diff --git a/Cargo.toml b/Cargo.toml index e2c07da3..c8fec285 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ members = [ "cli", "commons", - "core", + "core", "frontend", "inline", "parser", "render", diff --git a/commons/src/test_runner/snap_test_runner.rs b/commons/src/test_runner/snap_test_runner.rs index c3637be7..bd6d0be3 100644 --- a/commons/src/test_runner/snap_test_runner.rs +++ b/commons/src/test_runner/snap_test_runner.rs @@ -1,4 +1,3 @@ -use crate::lexer::token::Token; use serde::Serialize; pub use insta::{assert_snapshot, Settings}; @@ -14,17 +13,17 @@ pub struct SnapTestRunner<'a, I = ()> { } impl<'a> SnapTestRunner<'a> { - pub fn with_fn(name: &str, input: &'a S, mut parser: PF) -> SnapTestRunner<'a, ()> + pub fn with_fn(name: &str, input: &'a I, mut func: F) -> SnapTestRunner<'a, ()> where - S: AsRef<[Token<'a>]>, - PF: for<'s, 'i> FnMut(&'s [Token<'i>]) -> String, + I: AsRef, + F: FnMut(&I) -> String, { - let snapshot = parser(input.as_ref()); + let snapshot = func(input); SnapTestRunner { info: None, desc: None, - input: Token::flatten(input.as_ref()), + input: Some(input.as_ref()), name: name.into(), sub_path: None, snapshot, diff --git a/core/tests/runner/mod.rs b/core/tests/runner/mod.rs index 29dbae67..2f4fcc35 100644 --- a/core/tests/runner/mod.rs +++ b/core/tests/runner/mod.rs @@ -65,20 +65,19 @@ fn run_spec_test(case: test_runner::test_file::TestCase) { } fn run_snap_test(case: test_runner::test_file::TestCase) { - let tokens = unimarkup_commons::lexer::token::lex_str(&case.test.input); - - let mut snap_runner = SnapTestRunner::with_fn::<_, _>(&case.test.name, &tokens, |_input| { - let um = unimarkup_core::parser::parse_unimarkup( - &case.test.input, - unimarkup_commons::config::Config::default(), - ); - - Snapshot(um.blocks).as_snapshot() - }) - .with_info(format!( - "Test '{}' from: {}", - case.test.name, case.file_path - )); + let mut snap_runner = + SnapTestRunner::with_fn::<_, _>(&case.test.name, &case.test.input, |_input| { + let um = unimarkup_core::parser::parse_unimarkup( + &case.test.input, + unimarkup_commons::config::Config::default(), + ); + + Snapshot(um.blocks).as_snapshot() + }) + .with_info(format!( + "Test '{}' from: {}", + case.test.name, case.file_path + )); if let Some(ref description) = case.test.description { snap_runner = snap_runner.with_description(description); diff --git a/frontend/Cargo.toml b/frontend/Cargo.toml new file mode 100644 index 00000000..a0509540 --- /dev/null +++ b/frontend/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "unimarkup-frontend" +version.workspace = true +edition.workspace = true +authors.workspace = true +description.workspace = true +repository.workspace = true +homepage.workspace = true +readme.workspace = true +license.workspace = true + +[[test]] +name="unimarkup-frontend" +path="tests/snapshots.rs" +harness=false + +[dependencies] +icu_properties = "1.3.2" +ribbon = "0.7.0" +strum = "0.22.0" +strum_macros = "0.22.0" + +[dev-dependencies] +unimarkup-commons = { path ="../commons/", version = "0", features = ["test_runner"] } +libtest-mimic = "0.6.1" diff --git a/frontend/src/lexer/mod.rs b/frontend/src/lexer/mod.rs new file mode 100644 index 00000000..dcd63623 --- /dev/null +++ b/frontend/src/lexer/mod.rs @@ -0,0 +1,467 @@ +#![allow(dead_code)] +pub mod token; +pub mod token_kind; + +use ribbon::{Enroll, Ribbon, Tape}; +use token::Token; +use token_kind::TokenKind; + +use crate::scanner::SymbolStream; +use crate::symbol::{Symbol, SymbolKind}; + +/// Lexes the indentation token. Indentation is defined as some number of spaces at the beginning +/// of a line. +fn indentation_or_blankline<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + // at least one space token is seen by the caller + let mut indent = 1; + let mut span = start_sym.span; + + // make sure we have all spaces + sym_stream.expand_while(|s| s.kind == SymbolKind::Space); + + while let Some(sym) = sym_stream.pop_front() { + if sym.kind == SymbolKind::Space { + indent += 1; + span.len += sym.span.len; + } + } + + sym_stream.expand(); + + match sym_stream.peek_front() { + Some(sym) if sym.kind == SymbolKind::Newline => { + // indentation means there was a newline, then spaces + // and now again a newline, meaning we found a line that's blank + span.len += sym.len(); + Token { + input: start_sym.input, + kind: TokenKind::Blankline, + span, + } + } + _ => Token { + input: start_sym.input, + kind: TokenKind::Indentation(indent), + span, + }, + } +} + +fn identifier<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + let mut pos_info = start_sym.span; + + sym_stream.expand_while(|s| s.kind == SymbolKind::Plain); + + while let Some(sym) = sym_stream.pop_front() { + pos_info.len += sym.span.len; + } + + Token { + input: start_sym.input, + kind: TokenKind::Plain, + span: pos_info, + } +} + +fn punctuation<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + let mut pos_info = start_sym.span; + + sym_stream.expand_while(|s| s.kind == SymbolKind::TerminalPunctuation); + + while let Some(sym) = sym_stream.pop_front() { + // TODO: how do we handle multiple punctuation symbols? Should it be one symbol? + // e.g.: This sentence ends with three dots... + // ^^^ - should this be one token? + pos_info.len += sym.span.len; + } + + Token { + input: start_sym.input, + kind: TokenKind::TerminalPunctuation, + span: pos_info, + } +} + +fn whitespace<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + let mut span = start_sym.span; + + sym_stream.expand_while(|s| s.kind == SymbolKind::Whitespace); + + while let Some(sym) = sym_stream.pop_front() { + span.len += sym.len(); + } + + Token { + input: start_sym.input, + kind: TokenKind::Whitespace, + span, + } +} + +fn plain<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + let mut span = start_sym.span; + + sym_stream.expand_while(|s| s.kind == start_sym.kind); + + while let Some(sym) = sym_stream.pop_front() { + span.len += sym.len(); + } + + Token { + input: start_sym.input, + kind: TokenKind::Plain, + span, + } +} + +fn repeated<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + let mut span = start_sym.span; + + sym_stream.expand_while(|symbol| symbol.kind == start_sym.kind); + + while let Some(sym) = sym_stream.pop_front() { + span.len += sym.len(); + } + + Token { + input: start_sym.input, + kind: TokenKind::from((start_sym.kind, span.len)), + span, + } +} + +fn escaped<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + let mut span = start_sym.span; + + sym_stream.expand(); + + let sym = sym_stream.pop_front().expect("Unexpected EOI after '\\'"); + span.len += sym.len(); + + Token { + input: start_sym.input, + kind: TokenKind::Plain, + span, + } +} + +fn whitespace_or_blankline<'input>( + start_sym: &Symbol<'input>, + sym_stream: &mut Tape>, +) -> Token<'input> { + let mut span = start_sym.span; + + sym_stream.expand_while(|s| s.kind == start_sym.kind); + + while let Some(sym) = sym_stream.pop_front() { + span.len += sym.len(); + } + + sym_stream.expand(); + + match sym_stream.peek_front() { + Some(sym) if sym.kind == SymbolKind::Newline => { + span.len += sym.len(); + Token { + input: start_sym.input, + kind: TokenKind::Blankline, + span, + } + } + + _ => Token { + input: start_sym.input, + kind: TokenKind::Whitespace, + span, + }, + } +} + +pub struct TokenStream<'input> { + input: &'input str, + sym_stream: Tape>, + last_newline_offs: u32, +} + +impl<'input> TokenStream<'input> { + pub fn tokenize(input: &'input str) -> Self { + let sym_stream = SymbolStream::scan_str(input).tape(); + + Self { + input, + sym_stream, + last_newline_offs: 0, + } + } +} + +impl<'input> Iterator for TokenStream<'input> { + type Item = Token<'input>; + + fn next(&mut self) -> Option { + loop { + let sym = self.sym_stream.next()?; + + match sym.kind { + SymbolKind::Space => { + if self.is_start_of_line(&sym) { + return Some(indentation_or_blankline(&sym, &mut self.sym_stream)); + } else { + return Some(Token { + input: self.input, + kind: TokenKind::Whitespace, + span: sym.span, + }); + } + } + + SymbolKind::Newline => { + let input = self.input; + let span = sym.span; + + let kind = if self.is_start_of_line(&sym) { + TokenKind::Blankline + } else { + TokenKind::Newline + }; + + self.last_newline_offs = sym.span.offs; + return Some(Token { input, kind, span }); + } + + SymbolKind::Backslash => { + self.sym_stream.expand(); + + if matches!( + self.sym_stream.peek_front(), + Some(Symbol { + kind: SymbolKind::Newline, + .. + }) + ) { + // skip the newline! + self.sym_stream.pop_front(); + } else { + return Some(escaped(&sym, &mut self.sym_stream)); + } + } + + SymbolKind::TerminalPunctuation => { + return Some(punctuation(&sym, &mut self.sym_stream)) + } + + SymbolKind::Whitespace => { + if self.is_start_of_line(&sym) { + return Some(whitespace_or_blankline(&sym, &mut self.sym_stream)); + } else { + return Some(whitespace(&sym, &mut self.sym_stream)); + } + } + + SymbolKind::Eoi => return None, + + SymbolKind::Hash => { + return Some(repeated(&sym, &mut self.sym_stream)); + } + + SymbolKind::Star + | SymbolKind::Tick + | SymbolKind::Tilde + | SymbolKind::Underline + | SymbolKind::Caret + | SymbolKind::Quote + | SymbolKind::Dollar + | SymbolKind::Colon + | SymbolKind::Pipe + | SymbolKind::Plus + | SymbolKind::Dot + | SymbolKind::Ampersand + | SymbolKind::Comma + | SymbolKind::OpenParenthesis + | SymbolKind::CloseParenthesis + | SymbolKind::OpenBracket + | SymbolKind::CloseBracket + | SymbolKind::OpenBrace + | SymbolKind::CloseBrace => return Some(repeated(&sym, &mut self.sym_stream)), + + SymbolKind::Plain => return Some(plain(&sym, &mut self.sym_stream)), + + _other => { + return Some(identifier(&sym, &mut self.sym_stream)); + } + } + } + } +} + +impl TokenStream<'_> { + fn is_start_of_line(&self, sym: &Symbol<'_>) -> bool { + if self.last_newline_offs == 0 { + sym.span.offs == 0 + } else { + sym.span.offs.saturating_sub(self.last_newline_offs) == 1 + } + } +} + +#[cfg(test)] +mod tests { + use crate::lexer::token_kind::TokenKind; + + use super::TokenStream; + + #[test] + fn indentation() { + let input = " hello"; + let tokens: Vec<_> = super::TokenStream::tokenize(input).collect(); + + assert_eq!(tokens.len(), 2); + assert_eq!(tokens.first().unwrap().kind, TokenKind::Indentation(4)); + + let second = tokens.get(1).unwrap(); + assert_eq!(second.kind, TokenKind::Plain); + assert_eq!(second.as_input_str(), "hello"); + } + + #[test] + fn multi_line_indent() { + let input = " hello\n there"; + let tokens: Vec<_> = dbg!(super::TokenStream::tokenize(input).collect()); + + assert_eq!(tokens.len(), 5); + + let first = tokens.first().unwrap(); + assert_eq!(first.kind, TokenKind::Indentation(4)); + + let second = tokens.get(1).unwrap(); + assert_eq!(second.kind, TokenKind::Plain); + assert_eq!(second.as_input_str(), "hello"); + + let third = tokens.get(2).unwrap(); + + assert_eq!(third.kind, TokenKind::Newline); + + let fourth = tokens.get(3).unwrap(); + assert_eq!(fourth.kind, TokenKind::Indentation(6)); + + let fifth = tokens.get(4).unwrap(); + assert_eq!(fifth.kind, TokenKind::Plain); + assert_eq!(fifth.as_input_str(), "there"); + } + + #[test] + fn lf_newline() { + let input = "hello\nthere"; + + let tokens: Vec<_> = dbg!(TokenStream::tokenize(input).collect()); + + assert_eq!(tokens.len(), 3); + + let first = tokens.first().unwrap(); + assert_eq!(first.kind, TokenKind::Plain); + + assert_eq!(first.as_input_str(), "hello"); + + let second = tokens.get(1).unwrap(); + assert_eq!(second.kind, TokenKind::Newline); + + let third = tokens.get(2).unwrap(); + assert_eq!(third.kind, TokenKind::Plain); + + assert_eq!(third.as_input_str(), "there"); + } + + #[test] + fn cr_newline() { + let input = "hello\rthere"; + + let tokens: Vec<_> = dbg!(TokenStream::tokenize(input).collect()); + + assert_eq!(tokens.len(), 3); + + let first = tokens.first().unwrap(); + assert!(first.kind == TokenKind::Plain); + + assert_eq!(first.as_input_str(), "hello"); + + let second = tokens.get(1).unwrap(); + assert!(second.kind == TokenKind::Newline); + + let third = tokens.get(2).unwrap(); + assert!(third.kind == TokenKind::Plain); + + assert_eq!(third.as_input_str(), "there"); + } + + #[test] + fn cr_lf_newline() { + let input = "hello\r\nthere"; + + let tokens: Vec<_> = dbg!(TokenStream::tokenize(input).collect()); + + assert_eq!(tokens.len(), 3); + + let first = tokens.first().unwrap(); + assert!(first.kind == TokenKind::Plain); + + assert_eq!(first.as_input_str(), "hello"); + + let second = tokens.get(1).unwrap(); + assert!(dbg!(second.kind) == TokenKind::Newline); + + let third = tokens.get(2).unwrap(); + assert!(third.kind == TokenKind::Plain); + + assert_eq!(third.as_input_str(), "there"); + } + + #[test] + fn headline() { + let input = "## Hello there"; + + let tokens: Vec<_> = dbg!(TokenStream::tokenize(input).collect()); + + assert_eq!(tokens.len(), 5); + + let first = &tokens[0]; + assert_eq!(first.kind, TokenKind::Hash(2)); + assert_eq!(first.as_input_str(), "##"); + + let second = &tokens[1]; + assert_eq!(second.kind, TokenKind::Whitespace); + assert_eq!(second.as_input_str(), " "); + + let third = &tokens[2]; + assert_eq!(third.kind, TokenKind::Plain); + assert_eq!(third.as_input_str(), "Hello"); + + let fourth = &tokens[3]; + assert_eq!(fourth.kind, TokenKind::Whitespace); + assert_eq!(fourth.as_input_str(), " "); + + let fifth = &tokens[4]; + assert_eq!(fifth.kind, TokenKind::Plain); + assert_eq!(fifth.as_input_str(), "there"); + } +} diff --git a/frontend/src/lexer/token.rs b/frontend/src/lexer/token.rs new file mode 100644 index 00000000..d58d9d1e --- /dev/null +++ b/frontend/src/lexer/token.rs @@ -0,0 +1,38 @@ +use crate::span::Span; + +use super::token_kind::TokenKind; + +/// Token lexed from grapheme [`Symbol`]s of the given input. +/// +/// # Lifetimes +/// +/// * `'input` - lifetime of input the [`Token`] was lexed from. +#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token<'input> { + pub input: &'input str, + pub kind: TokenKind, + pub span: Span, +} + +impl std::fmt::Debug for Token<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let start = self.span.offs as usize; + let end = self.span.offs as usize + self.span.len as usize; + + f.debug_struct("Token") + .field("input", &self.input) + .field("output", &self.input[start..end].to_string()) + .field("kind", &self.kind) + .field("offs", &self.span.offs) + .field("len", &self.span.len) + .finish() + } +} + +impl<'input> Token<'input> { + pub fn as_input_str(&self) -> &'input str { + let start = self.span.offs as usize; + let end = self.span.offs as usize + self.span.len as usize; + &self.input[start..end] + } +} diff --git a/frontend/src/lexer/token_kind.rs b/frontend/src/lexer/token_kind.rs new file mode 100644 index 00000000..c5e61cc5 --- /dev/null +++ b/frontend/src/lexer/token_kind.rs @@ -0,0 +1,278 @@ +use crate::symbol::SymbolKind; + +pub const COMMENT_TOKEN_LEN: usize = 2; + +/// The kind of the token found in Unimarkup document. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TokenKind { + // Keywords + Star(u32), + Hash(u32), + Minus(u32), + Plus(u32), + Underline(u32), + Caret(u32), + Tick(u32), + Pipe(u32), + Tilde(u32), + Quote(u32), + Dollar(u32), + Colon(u32), + Dot(u32), + Ampersand(u32), + Comma(u32), + + // parenthesis + OpenParenthesis, + CloseParenthesis, + OpenBracket, + CloseBracket, + OpenBrace, + CloseBrace, + + // Spaces + Whitespace, + Newline, + Blankline, + Eoi, + Indentation(u32), + + // Escaped + EscapedPlain, + EscapedWhitespace, + EscapedNewline, + + // Plain + #[default] + Plain, + TerminalPunctuation, + + // Specials + Comment { + // Set to `true` if comment was implicitly closed at end of line + implicit_close: bool, + }, + // ImplicitSubstitution(ImplicitSubstitutionKind), + DirectUri, + + // For matching + Any, + Space, + EnclosedBlockEnd, + PossibleAttributes, + PossibleDecorator, +} + +impl TokenKind { + pub fn is_keyword(&self) -> bool { + !self.is_not_keyword() + } + + pub fn is_not_keyword(&self) -> bool { + matches!( + self, + TokenKind::Whitespace + | TokenKind::Newline + | TokenKind::Blankline + | TokenKind::Eoi + | TokenKind::EscapedPlain + | TokenKind::EscapedWhitespace + | TokenKind::EscapedNewline + | TokenKind::Plain + | TokenKind::TerminalPunctuation + | TokenKind::Comment { .. } + | TokenKind::DirectUri + | TokenKind::Any + | TokenKind::Space + | TokenKind::EnclosedBlockEnd + | TokenKind::PossibleAttributes + | TokenKind::PossibleDecorator + ) + } + + pub fn is_open_parenthesis(&self) -> bool { + matches!( + self, + TokenKind::OpenParenthesis | TokenKind::OpenBracket | TokenKind::OpenBrace + ) + } + + pub fn is_close_parenthesis(&self) -> bool { + matches!( + self, + TokenKind::CloseParenthesis | TokenKind::CloseBracket | TokenKind::CloseBrace + ) + } + + pub fn is_parenthesis(&self) -> bool { + self.is_open_parenthesis() || self.is_close_parenthesis() + } + + pub fn is_space(&self) -> bool { + matches!( + self, + TokenKind::Newline | TokenKind::Whitespace | TokenKind::Eoi | TokenKind::Blankline + ) + } + + pub fn is_plain(&self) -> bool { + matches!(self, TokenKind::Plain | TokenKind::TerminalPunctuation) + } +} + +impl From for String { + fn from(value: TokenKind) -> Self { + match value { + TokenKind::Star(len) => SymbolKind::Star.as_str().repeat(len as _), + TokenKind::Hash(len) => SymbolKind::Hash.as_str().repeat(len as _), + TokenKind::Minus(len) => SymbolKind::Minus.as_str().repeat(len as _), + TokenKind::Plus(len) => SymbolKind::Plus.as_str().repeat(len as _), + TokenKind::Underline(len) => SymbolKind::Underline.as_str().repeat(len as _), + TokenKind::Caret(len) => SymbolKind::Caret.as_str().repeat(len as _), + TokenKind::Tick(len) => SymbolKind::Tick.as_str().repeat(len as _), + TokenKind::Pipe(len) => SymbolKind::Pipe.as_str().repeat(len as _), + TokenKind::Tilde(len) => SymbolKind::Tilde.as_str().repeat(len as _), + TokenKind::Quote(len) => SymbolKind::Quote.as_str().repeat(len as _), + TokenKind::Dollar(len) => SymbolKind::Dollar.as_str().repeat(len as _), + TokenKind::Colon(len) => SymbolKind::Colon.as_str().repeat(len as _), + TokenKind::Dot(len) => SymbolKind::Dot.as_str().repeat(len as _), + TokenKind::Ampersand(len) => SymbolKind::Ampersand.as_str().repeat(len as _), + TokenKind::Comma(len) => SymbolKind::Comma.as_str().repeat(len as _), + TokenKind::OpenParenthesis => { + let mut s = String::with_capacity(SymbolKind::OpenParenthesis.as_str().len()); + s.push_str(SymbolKind::OpenParenthesis.as_str()); + s + } + TokenKind::CloseParenthesis => { + let mut s = String::with_capacity(SymbolKind::CloseParenthesis.as_str().len()); + s.push_str(SymbolKind::CloseParenthesis.as_str()); + s + } + TokenKind::OpenBracket => { + let mut s = String::with_capacity(SymbolKind::OpenBracket.as_str().len()); + s.push_str(SymbolKind::OpenBracket.as_str()); + s + } + TokenKind::CloseBracket => { + let mut s = String::with_capacity(SymbolKind::CloseBracket.as_str().len()); + s.push_str(SymbolKind::CloseBracket.as_str()); + s + } + TokenKind::OpenBrace => { + let mut s = String::with_capacity(SymbolKind::OpenBrace.as_str().len()); + s.push_str(SymbolKind::OpenBrace.as_str()); + s + } + TokenKind::CloseBrace => { + let mut s = String::with_capacity(SymbolKind::CloseBrace.as_str().len()); + s.push_str(SymbolKind::CloseBrace.as_str()); + s + } + TokenKind::EscapedNewline | TokenKind::Newline | TokenKind::Blankline => { + // Blankline is also only one newline to handle contiguous blanklines. + let mut s = String::with_capacity(SymbolKind::Newline.as_str().len()); + s.push_str(SymbolKind::Newline.as_str()); + s + } + TokenKind::Whitespace => { + let mut s = String::with_capacity(SymbolKind::Whitespace.as_str().len()); + s.push_str(SymbolKind::Whitespace.as_str()); + s + } + TokenKind::Plain + | TokenKind::TerminalPunctuation + | TokenKind::EscapedPlain + | TokenKind::EscapedWhitespace + // | TokenKind::ImplicitSubstitution(_) + | TokenKind::Comment { .. } + | TokenKind::DirectUri + | TokenKind::PossibleAttributes + | TokenKind::PossibleDecorator + | TokenKind::Any + | TokenKind::EnclosedBlockEnd + | TokenKind::Space + | TokenKind::Eoi => { + #[cfg(debug_assertions)] + panic!( + "Tried to create String from '{:?}', which has undefined String representation.", + value + ); + + #[cfg(not(debug_assertions))] + String::new() + } + TokenKind::Indentation(indent) => " ".repeat(indent as _), + } + } +} + +impl From for TokenKind { + fn from(value: SymbolKind) -> Self { + match value { + SymbolKind::Plain | SymbolKind::Backslash => TokenKind::Plain, // Backslash is incorrect, but will be corrected in iterator + SymbolKind::TerminalPunctuation => TokenKind::TerminalPunctuation, + SymbolKind::Whitespace => TokenKind::Whitespace, + SymbolKind::Newline => TokenKind::Newline, + SymbolKind::Eoi => TokenKind::Eoi, + SymbolKind::Hash => TokenKind::Hash(1), + SymbolKind::Star => TokenKind::Star(1), + SymbolKind::Minus => TokenKind::Minus(1), + SymbolKind::Plus => TokenKind::Plus(1), + SymbolKind::Underline => TokenKind::Underline(1), + SymbolKind::Caret => TokenKind::Caret(1), + SymbolKind::Tick => TokenKind::Tick(1), + SymbolKind::Pipe => TokenKind::Pipe(1), + SymbolKind::Tilde => TokenKind::Tilde(1), + SymbolKind::Quote => TokenKind::Quote(1), + SymbolKind::Dollar => TokenKind::Dollar(1), + SymbolKind::Colon => TokenKind::Colon(1), + SymbolKind::Dot => TokenKind::Colon(1), + SymbolKind::Ampersand => TokenKind::Ampersand(1), + SymbolKind::Comma => TokenKind::Comma(1), + SymbolKind::OpenParenthesis => TokenKind::OpenParenthesis, + SymbolKind::CloseParenthesis => TokenKind::CloseParenthesis, + SymbolKind::OpenBracket => TokenKind::OpenBracket, + SymbolKind::CloseBracket => TokenKind::CloseBracket, + SymbolKind::OpenBrace => TokenKind::OpenBrace, + SymbolKind::CloseBrace => TokenKind::CloseBrace, + SymbolKind::Space => TokenKind::Indentation(1), + } + } +} + +impl From<(SymbolKind, u32)> for TokenKind { + fn from(value: (SymbolKind, u32)) -> Self { + let kind = value.0; + let len = value.1; + + match kind { + SymbolKind::Plain | SymbolKind::Backslash => TokenKind::Plain, // Backslash is incorrect, but will be corrected in iterator + SymbolKind::TerminalPunctuation => TokenKind::TerminalPunctuation, + SymbolKind::Whitespace => TokenKind::Whitespace, + SymbolKind::Newline => TokenKind::Newline, + SymbolKind::Eoi => TokenKind::Eoi, + SymbolKind::Hash => TokenKind::Hash(len), + SymbolKind::Star => TokenKind::Star(len), + SymbolKind::Minus => TokenKind::Minus(len), + SymbolKind::Plus => TokenKind::Plus(len), + SymbolKind::Underline => TokenKind::Underline(len), + SymbolKind::Caret => TokenKind::Caret(len), + SymbolKind::Tick => TokenKind::Tick(len), + SymbolKind::Pipe => TokenKind::Pipe(len), + SymbolKind::Tilde => TokenKind::Tilde(len), + SymbolKind::Quote => TokenKind::Quote(len), + SymbolKind::Dollar => TokenKind::Dollar(len), + SymbolKind::Colon => TokenKind::Colon(len), + SymbolKind::Dot => TokenKind::Dot(len), + SymbolKind::Ampersand => TokenKind::Ampersand(len), + SymbolKind::Comma => TokenKind::Comma(len), + SymbolKind::OpenParenthesis => TokenKind::OpenParenthesis, + SymbolKind::CloseParenthesis => TokenKind::CloseParenthesis, + SymbolKind::OpenBracket => TokenKind::OpenBracket, + SymbolKind::CloseBracket => TokenKind::CloseBracket, + SymbolKind::OpenBrace => TokenKind::OpenBrace, + SymbolKind::CloseBrace => TokenKind::CloseBrace, + SymbolKind::Space => TokenKind::Indentation(1), + } + } +} diff --git a/frontend/src/lib.rs b/frontend/src/lib.rs new file mode 100644 index 00000000..0437f0b0 --- /dev/null +++ b/frontend/src/lib.rs @@ -0,0 +1,5 @@ +pub mod lexer; +pub mod parser; +mod scanner; +pub mod span; +mod symbol; diff --git a/frontend/src/parser/block/bulletlist.rs b/frontend/src/parser/block/bulletlist.rs new file mode 100644 index 00000000..fa7ff2db --- /dev/null +++ b/frontend/src/parser/block/bulletlist.rs @@ -0,0 +1,34 @@ +use crate::span::Span; + +/// Enum representing the keyword used to create a [`BulletListEntry`]. +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum BulletListEntryKeyword { + /// Minus keyword: `-` + Minus, + /// Plus keyword: `+` + Plus, + /// Star keyword: `*` + Star, +} + +/// Structure of a Unimarkup bullet list entry. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct BulletListEntry { + /// The [`BulletListEntryKeyword`] used to create this entry. + pub keyword: BulletListEntryKeyword, + /// The entry heading content of this entry. + pub heading: Vec, + /// The body of this entry. + pub body: Vec, + /// The span this element occupies in the Unimarkup input. + pub span: Span, +} + +/// Structure of a Unimarkup bullet list element. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct BulletList { + /// The list entries of this bullet list. + pub entries: Vec, + /// The span this element occupies in the Unimarkup input. + pub span: Span, +} diff --git a/frontend/src/parser/block/heading.rs b/frontend/src/parser/block/heading.rs new file mode 100644 index 00000000..18555e2b --- /dev/null +++ b/frontend/src/parser/block/heading.rs @@ -0,0 +1,72 @@ +use crate::span::Span; + +/// Enum of possible heading levels for unimarkup headings +#[derive(Eq, PartialEq, Debug, strum_macros::Display, strum_macros::EnumString, Clone, Copy)] +#[strum(serialize_all = "kebab-case")] +pub enum HeadingLevel { + /// Heading level 1, corresponds to `# ` in Unimarkup. + #[strum(serialize = "level-1")] + Level1 = 1, // start counting from 0 + + /// Heading level 2, corresponds to `## ` in Unimarkup. + #[strum(serialize = "level-2")] + Level2, + + /// Heading level 3, corresponds to `### ` in Unimarkup. + #[strum(serialize = "level-3")] + Level3, + + /// Heading level 4, corresponds to `#### ` in Unimarkup. + #[strum(serialize = "level-4")] + Level4, + + /// Heading level 5, corresponds to `##### ` in Unimarkup. + #[strum(serialize = "level-5")] + Level5, + + /// Heading level 6, corresponds to `###### ` in Unimarkup. + #[strum(serialize = "level-6")] + Level6, +} + +impl TryFrom for HeadingLevel { + type Error = String; + + fn try_from(value: u32) -> Result { + let level = match value { + 1 => HeadingLevel::Level1, + 2 => HeadingLevel::Level2, + 3 => HeadingLevel::Level3, + 4 => HeadingLevel::Level4, + 5 => HeadingLevel::Level5, + 6 => HeadingLevel::Level6, + other => return Err(format!("Invalid heading level: {other}")), + }; + + Ok(level) + } +} + +impl From for u8 { + fn from(value: HeadingLevel) -> Self { + value as u8 + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Heading { + /// Unique identifier for a heading. + pub id: String, + + /// Heading level. + pub level: HeadingLevel, + + /// The content of the heading line. + pub content: Vec, + + /// Attributes of the heading. + pub attributes: Option, + + /// The span this element occupies in the Unimarkup input. + pub span: Span, +} diff --git a/frontend/src/parser/block/mod.rs b/frontend/src/parser/block/mod.rs new file mode 100644 index 00000000..4b8c27df --- /dev/null +++ b/frontend/src/parser/block/mod.rs @@ -0,0 +1,22 @@ +use crate::span::Span; + +pub mod bulletlist; +pub mod heading; +pub mod paragraph; +pub mod verbatim; + +/// Generic enum for all Unimarkup block elements. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Block { + /// Represents one blankline. + /// Needed in contexts where newlines must be kept. + Blankline(Span), + /// Represents the heading block + Heading(heading::Heading), + /// Represents the paragraph block + Paragraph(paragraph::Paragraph), + /// Represents the verbatim block + Verbatim(verbatim::Verbatim), + /// Represents the bullet list block + BulletList(bulletlist::BulletList), +} diff --git a/frontend/src/parser/block/paragraph.rs b/frontend/src/parser/block/paragraph.rs new file mode 100644 index 00000000..6843eee6 --- /dev/null +++ b/frontend/src/parser/block/paragraph.rs @@ -0,0 +1,11 @@ +use crate::span::Span; + +/// Structure of a Unimarkup paragraph element. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct Paragraph { + /// The content of the paragraph. + pub content: Vec, + + /// The span this element occupies in the Unimarkup input. + pub span: Span, +} diff --git a/frontend/src/parser/block/verbatim.rs b/frontend/src/parser/block/verbatim.rs new file mode 100644 index 00000000..00be1ad7 --- /dev/null +++ b/frontend/src/parser/block/verbatim.rs @@ -0,0 +1,19 @@ +use crate::span::Span; + +/// Structure of a Unimarkup verbatim block element. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Verbatim { + /// The content of the verbatim block. + pub content: String, + /// The language used to highlight the content. + pub data_lang: Option, + /// Attributes of the verbatim block. + // TODO: make attributes data structure + pub attributes: Option, + /// Marks that this verbatim block was implicitly closed. + pub implicit_closed: bool, + /// The number of backticks this verbatim block was created with. + pub tick_len: usize, + /// The span this element occupies in the Unimarkup input. + pub span: Span, +} diff --git a/frontend/src/parser/mod.rs b/frontend/src/parser/mod.rs new file mode 100644 index 00000000..a8b2f59e --- /dev/null +++ b/frontend/src/parser/mod.rs @@ -0,0 +1,208 @@ +use crate::{ + lexer::{token::Token, token_kind::TokenKind, TokenStream}, + span::Span, +}; + +pub mod block; + +use block::{ + heading::{Heading, HeadingLevel}, + paragraph::Paragraph, + Block, +}; +use ribbon::{Enroll, Ribbon, Tape}; + +pub struct Parser<'input> { + /// Iterator that returns tokens found in the Unimarkup input. + tokens: Tape>, +} + +pub fn parse(input: &str) -> Parser<'_> { + Parser { + tokens: TokenStream::tokenize(input).tape(), + } +} + +impl Iterator for Parser<'_> { + type Item = Block; + + fn next(&mut self) -> Option { + loop { + let token = self.tokens.next()?; + + // make sure we can peek next token. + self.tokens.expand(); + + match token.kind { + TokenKind::Hash(count) => { + self.tokens.expand(); + + let is_next_whitespace = self + .tokens + .peek_front() + .map(|token| token.kind == TokenKind::Whitespace) + .unwrap_or(false); + + match HeadingLevel::try_from(count) { + Ok(level) if is_next_whitespace => { + return self.parse_heading(level).map(Block::Heading); + } + _ => return self.parse_paragraph(Some(token)), + } + } + + TokenKind::Blankline => { + continue; + } + + _other => return self.parse_paragraph(Some(token)), + // TokenKind::Star(_) => todo!(), + // TokenKind::Minus(_) => todo!(), + // TokenKind::Plus(_) => todo!(), + // TokenKind::Underline(_) => todo!(), + // TokenKind::Caret(_) => todo!(), + // TokenKind::Tick(_) => todo!(), + // TokenKind::Pipe(_) => todo!(), + // TokenKind::Tilde(_) => todo!(), + // TokenKind::Quote(_) => todo!(), + // TokenKind::Dollar(_) => todo!(), + // TokenKind::Colon(_) => todo!(), + // TokenKind::Dot(_) => todo!(), + // TokenKind::Ampersand(_) => todo!(), + // TokenKind::Comma(_) => todo!(), + // TokenKind::OpenParenthesis => todo!(), + // TokenKind::CloseParenthesis => todo!(), + // TokenKind::OpenBracket => todo!(), + // TokenKind::CloseBracket => todo!(), + // TokenKind::OpenBrace => todo!(), + // TokenKind::CloseBrace => todo!(), + // TokenKind::Whitespace => todo!(), + // TokenKind::Newline => todo!(), + // TokenKind::Blankline => todo!(), + // TokenKind::Eoi => todo!(), + // TokenKind::Indentation(_) => todo!(), + // TokenKind::EscapedPlain => todo!(), + // TokenKind::EscapedWhitespace => todo!(), + // TokenKind::EscapedNewline => todo!(), + // TokenKind::Plain => todo!(), + // TokenKind::TerminalPunctuation => todo!(), + // TokenKind::Comment { implicit_close } => todo!(), + // TokenKind::DirectUri => todo!(), + // TokenKind::Any => todo!(), + // TokenKind::Space => todo!(), + // TokenKind::EnclosedBlockEnd => todo!(), + // TokenKind::PossibleAttributes => todo!(), + // TokenKind::PossibleDecorator => todo!(), + } + } + } +} + +impl Parser<'_> { + fn parse_heading(&mut self, level: HeadingLevel) -> Option { + let expected_indentation = (u8::from(level) + 1) as u32; + + self.tokens.expand_while(|token| match token.kind { + TokenKind::Indentation(indent_level) => indent_level == expected_indentation, + TokenKind::Blankline => false, + _other => true, + }); + + let mut content = String::with_capacity(self.tokens.len()); + let mut span: Option = None; + + let mut is_start_of_line = true; + + while let Some(token) = self.tokens.pop_front() { + if let Some(span) = span.as_mut() { + span.len += token.span.len; + } else { + span = Some(token.span); + } + + match token.kind { + TokenKind::Whitespace | TokenKind::Space | TokenKind::Indentation(_) + if is_start_of_line => + { + continue; + } + _ => content += token.as_input_str(), + } + + is_start_of_line = matches!(token.kind, TokenKind::Newline | TokenKind::Blankline) + } + + let span = span?; + + Some(Heading { + id: String::from("placeholder-id"), + level, + content: vec![content], + attributes: None, + span, + }) + } + + fn parse_paragraph(&mut self, first_token: Option>) -> Option { + self.tokens + .expand_while(|token| token.kind != TokenKind::Blankline); + + let tape_len = self.tokens.len(); + let tape_iter = std::iter::from_fn(|| self.tokens.pop_front()); + + let mut content = String::with_capacity(tape_len); + let mut span: Option = None; + + for token in std::iter::once(first_token).flatten().chain(tape_iter) { + if let Some(span) = &mut span { + span.len += token.span.len; + } else { + span = Some(token.span); + } + + content += token.as_input_str(); + } + + let span = span?; + + Some(Block::Paragraph(Paragraph { + content: vec![content], + span, + })) + } +} + +#[cfg(test)] +mod tests { + use crate::parser::block::{heading::HeadingLevel, Block}; + + use super::parse; + + #[test] + fn parse_heading() { + let input = "## hello there!"; + + let heading_block = parse(input) + .next() + .expect("Should correctly parse heading!"); + + let Block::Heading(heading) = heading_block else { + panic!("Should correctly parse heading."); + }; + + assert_eq!(heading.level, HeadingLevel::Level2); + } + + #[test] + fn invalid_heading() { + let input = "##hello there!"; + + let block = parse(input) + .next() + .expect("Should correctly parse heading!"); + + let Block::Paragraph(_paragraph) = block else { + panic!("Should correctly parse heading."); + }; + } +} diff --git a/frontend/src/scanner.rs b/frontend/src/scanner.rs new file mode 100644 index 00000000..793176be --- /dev/null +++ b/frontend/src/scanner.rs @@ -0,0 +1,64 @@ +use std::{iter::Peekable, str::Bytes}; + +use super::span::Span as SymPos; +use crate::symbol::{Symbol, SymbolKind}; + +/// Iterator of Unimarkup [`Symbol`]s over a given input. +pub(crate) struct SymbolStream<'input> { + /// The input from which the Unimarkup symbols are to be scanned. + input: &'input str, + + /// The bytes representation of the input. + bytes: Peekable>, + + /// Byte offset into the `self.input`. Input can't be larger than `2^32 B = 4 GB` + curr_offs: u32, +} + +impl<'input> SymbolStream<'input> { + pub fn scan_str(input: &'input str) -> Self { + let bytes = input.bytes().peekable(); + + // make sure the input does not exceed the maximum size. + debug_assert!(bytes.len() < (2usize.pow(32))); + + Self { + input, + bytes, + curr_offs: 0, + } + } +} + +impl<'input> Iterator for SymbolStream<'input> { + type Item = Symbol<'input>; + + fn next(&mut self) -> Option { + let next_byte = self.bytes.next()?; + + let kind = SymbolKind::from(next_byte); + + let byte_len = match (kind, next_byte, self.bytes.peek()) { + (SymbolKind::Newline, b'\r', Some(b'\n')) => { + // "\r\n" is split into '\r' and '\n', so we can check if the char was '\r' and if + // so, we can consume '\n' as well. + self.bytes.next(); + 2 + } + _ => 1, + }; + + let prev_offs = self.curr_offs; + + self.curr_offs += byte_len; + + Some(Symbol { + input: self.input, + kind, + span: SymPos { + offs: prev_offs, + len: byte_len, + }, + }) + } +} diff --git a/frontend/src/span.rs b/frontend/src/span.rs new file mode 100644 index 00000000..c30700a1 --- /dev/null +++ b/frontend/src/span.rs @@ -0,0 +1,70 @@ +//! Utilities for tracking the positional information about symbols, tokens and other elements in +//! original input. + +use std::ops::{Add, AddAssign, Sub, SubAssign}; + +/// Indicates position of a symbol or token in a Unimarkup document. Counting of both byte and code +/// point offsets starts at zero. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Span { + /// Byte offset into the input where the symbol was found in. Note that input can't be larger + /// than `2^32 B = 4 GB` + pub offs: u32, + + // TODO: `len` and `cp_count` can be calculated if we have two consecutive symbols, so maybe we + // don't need to store them at all times? We would need 4 bytes less per symbol in that case. + /// Length of the [`Span`] in bytes. + pub len: u32, +} + +impl AddAssign for Span { + fn add_assign(&mut self, rhs: Self) { + self.offs += rhs.offs; + self.len += rhs.len; + } +} + +impl AddAssign<(u32, u32)> for Span { + fn add_assign(&mut self, (offs, len): (u32, u32)) { + self.offs += offs; + self.len += len; + } +} + +impl Add for Span +where + Span: AddAssign, +{ + type Output = Span; + + fn add(mut self, rhs: T) -> Self::Output { + self += rhs; + self + } +} + +impl SubAssign for Span { + fn sub_assign(&mut self, rhs: Self) { + self.offs -= rhs.offs; + self.len -= rhs.len; + } +} + +impl SubAssign<(u32, u32)> for Span { + fn sub_assign(&mut self, (offs, len): (u32, u32)) { + self.offs -= offs; + self.len -= len; + } +} + +impl Sub for Span +where + Span: SubAssign, +{ + type Output = Span; + + fn sub(mut self, rhs: T) -> Self::Output { + self -= rhs; + self + } +} diff --git a/frontend/src/symbol/iterator.rs b/frontend/src/symbol/iterator.rs new file mode 100644 index 00000000..1ef3855e --- /dev/null +++ b/frontend/src/symbol/iterator.rs @@ -0,0 +1,109 @@ +use itertools::PeekingNext; + +use crate::lexer::{Symbol, SymbolKind}; + +#[derive(Debug, Clone)] +pub struct SymbolIterator<'slice, 'input> { + /// The [`Symbol`] slice the iterator was created for. + symbols: &'slice [Symbol<'input>], + /// The current index of the iterator inside the [`Symbol`] slice. + pub(super) index: usize, + /// The peek index of the iterator inside the [`Symbol`] slice. + pub(super) peek_index: usize, +} + +impl<'slice, 'input, T> From for SymbolIterator<'slice, 'input> +where + T: Into<&'slice [Symbol<'input>]>, +{ + fn from(value: T) -> Self { + SymbolIterator { + symbols: value.into(), + index: 0, + peek_index: 0, + } + } +} + +impl<'slice, 'input> Iterator for SymbolIterator<'slice, 'input> { + type Item = &'slice Symbol<'input>; + + fn next(&mut self) -> Option { + let symbol = self.symbols.get(self.index)?; + + self.index += 1; + self.peek_index = self.index; + + Some(symbol) + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.max_len())) + } +} + +impl<'slice, 'input> PeekingNext for SymbolIterator<'slice, 'input> { + fn peeking_next(&mut self, accept: F) -> Option + where + Self: Sized, + F: FnOnce(&Self::Item) -> bool, + { + let symbol = self.symbols.get(self.peek_index).filter(accept)?; + self.peek_index += 1; + Some(symbol) + } +} + +impl<'slice, 'input> SymbolIterator<'slice, 'input> { + /// Returns the maximum length of the remaining [`Symbol`]s this iterator might return. + /// + /// **Note:** This length does not consider parent iterators, or matching functions. + /// Therefore, the returned number of [`Symbol`]s might differ, but cannot be larger than this length. + pub fn max_len(&self) -> usize { + self.symbols.len().saturating_sub(self.index) + } + + /// Returns `true` if no more [`Symbol`]s are available. + pub fn is_empty(&self) -> bool { + self.max_len() == 0 + } + + /// Returns the current index this iterator is in the [`Symbol`] slice of the root iterator. + pub fn index(&self) -> usize { + self.index + } + + /// Sets the current index of this iterator to the given index. + pub(crate) fn set_index(&mut self, index: usize) { + debug_assert!(self.index <= index, "Tried to move the iterator backward."); + + self.index = index; + self.peek_index = index; + } + + /// Returns the index used to peek. + pub(crate) fn peek_index(&self) -> usize { + self.peek_index + } + + /// Sets the peek index of this iterator to the given index. + pub(crate) fn set_peek_index(&mut self, index: usize) { + if self.index() <= index { + self.peek_index = index; + } + } + + pub fn reset_peek(&mut self) { + self.set_peek_index(self.index()); + } + + /// Returns the next [`Symbol`] without changing the current index. + pub fn peek(&mut self) -> Option<&'slice Symbol<'input>> { + self.symbols.get(self.peek_index) + } + + /// Returns the [`SymbolKind`] of the peeked [`Symbol`]. + pub fn peek_kind(&mut self) -> Option { + self.peek().map(|s| s.kind) + } +} diff --git a/frontend/src/symbol/mod.rs b/frontend/src/symbol/mod.rs new file mode 100644 index 00000000..21eff818 --- /dev/null +++ b/frontend/src/symbol/mod.rs @@ -0,0 +1,304 @@ +//! Symbol and helper types for structurization of Unimarkup input. + +use core::{fmt, str}; + +use super::span::Span; + +// pub mod iterator; + +// TODO: add more terminal punctuation symbols +pub const TERMINAL_PUNCTUATION: &[u8] = b".?!"; + +/// Possible kinds of Symbol found in Unimarkup document. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum SymbolKind { + /// Regular text with no semantic meaning + #[default] + Plain, + /// Unicode terminal punctuation + TerminalPunctuation, + /// Any non-linebreaking whitespace + Whitespace, + /// Regular spaces, not tabs or other whitespace + Space, + /// A line break literal (for example `\n` or '\r\n') + Newline, + /// End of Unimarkup document + Eoi, + /// The backslash (`\`) is used for escaping other symbols. + Backslash, + /// Hash symbol (#) used for headings + Hash, + /// The star (`*`) literal is used for various elements. + Star, + /// The minus (`-`) literal is used for various elements. + Minus, + /// The plus (`+`) literal is used for various elements. + Plus, + /// The underline (`_`) literal is used for underline and/or subscript formatting. + Underline, + /// The caret (`^`) literal is used for superscript formatting. + Caret, + /// The tick (`` ` ``) literal is used for verbatim blocks and formatting. + Tick, + /// The pipe (`|`) literal is used for highlight formatting. + Pipe, + /// The tilde (`~`) literal is used for strikethrough formatting. + Tilde, + /// The quote (`"`) literal is used for quotation formatting. + Quote, + /// The dollar (`$`) literal is used for math mode formatting. + Dollar, + /// A colon literal (`:`) is used as marker (e.g. for alias substitutions `::heart::`). + Colon, + /// A dot literal (`.`). + Dot, + /// An ampersand literal (`&`) + Ampersand, + /// A comma literal (`,`) + Comma, + /// The open parentheses (`(`) literal is used for additional data to text group elements (e.g. + /// image insert). + OpenParenthesis, + /// The close parentheses (`)`) literal is used to close the additional data to text group. + CloseParenthesis, + /// The open bracket (`[`) literal is used for text group elements. + OpenBracket, + /// The close bracket (`]`) literal is used for text group elements. + CloseBracket, + /// The open brace (`{`) literal is used for inline attributes. + OpenBrace, + /// The close brace (`}`) literal is used for inline attributes. + CloseBrace, +} + +impl SymbolKind { + pub fn is_not_keyword(&self) -> bool { + matches!( + self, + SymbolKind::Newline | SymbolKind::Whitespace | SymbolKind::Plain | SymbolKind::Eoi + ) + } + + pub fn is_keyword(&self) -> bool { + !self.is_not_keyword() + } + + pub fn is_open_parenthesis(&self) -> bool { + matches!( + self, + SymbolKind::OpenParenthesis | SymbolKind::OpenBracket | SymbolKind::OpenBrace + ) + } + + pub fn is_close_parenthesis(&self) -> bool { + matches!( + self, + SymbolKind::CloseParenthesis | SymbolKind::CloseBracket | SymbolKind::CloseBrace + ) + } + + pub fn is_parenthesis(&self) -> bool { + self.is_open_parenthesis() || self.is_close_parenthesis() + } + + pub fn is_space(&self) -> bool { + matches!( + self, + SymbolKind::Newline | SymbolKind::Whitespace | SymbolKind::Eoi + ) + } +} + +/// Symbol representation of literals found in Unimarkup document. +#[derive(Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Symbol<'a> { + /// Original input the symbol is found in. + pub input: &'a str, + /// Kind of the symbol, e.g. a hash (#) + pub kind: SymbolKind, + + pub span: Span, +} + +impl fmt::Debug for Symbol<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let input = if self.input.len() < 100 { + String::from(self.input) + } else { + format!("{}...", &self.input[0..100]) + }; + + let output = { + let start = self.span.offs as usize; + let end = self.span.offs as usize + self.span.len as usize; + &self.input[start..end] + }; + + f.debug_struct("Symbol") + .field("input", &input) + .field("output", &output) + .field("kind", &self.kind) + .field("offs", &self.span.offs) + .field("len", &self.span.len) + .finish() + } +} + +impl Symbol<'_> { + // TODO: extension trait in core? + #[allow(dead_code)] + pub fn is_not_keyword(&self) -> bool { + self.kind.is_not_keyword() + } + + /// Returns the original string representation of the symbol. + #[allow(dead_code)] + pub fn as_str(&self) -> &str { + let start = self.span.offs as usize; + let end = self.span.offs as usize + self.span.len as usize; + + match self.kind { + SymbolKind::Plain | SymbolKind::Whitespace => &self.input[start..end], + _ => self.kind.as_str(), + } + } + + /// Flattens the input of consecutive symbols. Returns the slice of input starting from start + /// position of first symbol until the end of last symbol. Returns [`None`] if slice is empty. + /// + /// # Panics + /// + /// It's assumed that all [`Symbol`]s in slice reference the same input. If not, the function + /// might panic (guaranteed in debug) if inputs are not the same and last [`Symbol`] in slice + /// references input that is longer than the one referenced in the first [`Symbol`]. + #[allow(dead_code)] + pub fn flatten(symbols: &[Self]) -> Option<&str> { + let (first, last) = (symbols.first()?, symbols.last()?); + + debug_assert_eq!(first.input, last.input); + + let input = first.input; + + let start = first.span.offs as usize; + let end = last.span.offs as usize + last.span.len as usize; + + Some(&input[start..end]) + } + + /// Flattens the iterator of consecutive symbols. Returns the slice of input starting from start + /// position of first symbol until the end of last symbol. + /// + /// It is assumed (and checked in debug release) that the symbols are in contiguous order. + /// + /// Returns `None` if the referenced input is not same in all symbols. + #[allow(dead_code)] + pub fn flatten_iter<'s>(mut iter: impl Iterator>) -> Option<&'s str> { + let first = iter.next()?; + + #[cfg(debug_assertions)] + let last = std::iter::once(first).chain(iter).reduce(|prev, curr| { + debug_assert!(prev.span.offs + prev.span.len == curr.span.offs); + curr + })?; + + #[cfg(not(debug_assertions))] + let last = iter.last().unwrap_or(first); + + let input = first.input; + + let start = first.span.offs as usize; + let end = last.span.offs as usize + last.span.len as usize; + + Some(&input[start..end]) + } + + pub fn len(&self) -> u32 { + self.span.len + } +} + +impl From for SymbolKind { + fn from(value: u8) -> Self { + match value { + b'#' => SymbolKind::Hash, + b'\n' | b'\r' => SymbolKind::Newline, + b'`' => SymbolKind::Tick, + b'\\' => SymbolKind::Backslash, + b'*' => SymbolKind::Star, + b'-' => SymbolKind::Minus, + b'+' => SymbolKind::Plus, + b'_' => SymbolKind::Underline, + b'^' => SymbolKind::Caret, + b'|' => SymbolKind::Pipe, + b'~' => SymbolKind::Tilde, + b'\'' => SymbolKind::Quote, + b'$' => SymbolKind::Dollar, + b'(' => SymbolKind::OpenParenthesis, + b')' => SymbolKind::CloseParenthesis, + b'[' => SymbolKind::OpenBracket, + b']' => SymbolKind::CloseBracket, + b'{' => SymbolKind::OpenBrace, + b'}' => SymbolKind::CloseBrace, + b':' => SymbolKind::Colon, + b'.' => SymbolKind::Dot, + b'&' => SymbolKind::Ampersand, + b',' => SymbolKind::Comma, + b' ' => SymbolKind::Space, + symbol if symbol != b'\n' && symbol != b'\r' && char::from(value).is_whitespace() => { + SymbolKind::Whitespace + } + _ => { + let mut kind = SymbolKind::Plain; + + if TERMINAL_PUNCTUATION.contains(&value) { + kind = SymbolKind::TerminalPunctuation; + } + + kind + } + } + } +} + +impl SymbolKind { + pub fn as_str(&self) -> &str { + match self { + SymbolKind::Plain | SymbolKind::TerminalPunctuation => { + #[cfg(debug_assertions)] + panic!( + "Tried to create &str from '{self:?}', which has undefined &str representation." + ); + + #[cfg(not(debug_assertions))] + "" + } + SymbolKind::Hash => "#", + SymbolKind::Tick => "`", + SymbolKind::Whitespace => " ", + SymbolKind::Newline => "\n", + SymbolKind::Eoi => "", + SymbolKind::Backslash => "\\", + SymbolKind::Star => "*", + SymbolKind::Minus => "-", + SymbolKind::Plus => "+", + SymbolKind::Underline => "_", + SymbolKind::Caret => "^", + SymbolKind::Pipe => "|", + SymbolKind::Tilde => "~", + SymbolKind::Quote => "\"", + SymbolKind::Dollar => "$", + SymbolKind::OpenParenthesis => "(", + SymbolKind::CloseParenthesis => ")", + SymbolKind::OpenBracket => "[", + SymbolKind::CloseBracket => "]", + SymbolKind::OpenBrace => "{", + SymbolKind::CloseBrace => "}", + SymbolKind::Colon => ":", + SymbolKind::Dot => ".", + SymbolKind::Ampersand => "&", + SymbolKind::Comma => ",", + SymbolKind::Space => " ", + } + } +} diff --git a/frontend/tests/lexer/mod.rs b/frontend/tests/lexer/mod.rs new file mode 100644 index 00000000..88fdc676 --- /dev/null +++ b/frontend/tests/lexer/mod.rs @@ -0,0 +1,59 @@ +use std::{fmt::Write, panic}; + +use libtest_mimic::Trial; +use unimarkup_commons::test_runner::{ + self, as_snapshot::AsSnapshot, snap_test_runner::SnapTestRunner, +}; + +use crate::snapshot::Snapshot; + +mod snapshot; + +pub(crate) fn collect_snapshot_tests() -> Vec { + let tests_path = unimarkup_commons::crate_tests_path!(); + let test_cases = test_runner::collect_tests( + tests_path.join("spec/markup"), + tests_path.join("spec/snapshots/lexer"), + "markup", + ); + + let mut test_runs = Vec::with_capacity(test_cases.len()); + + for case in test_cases { + let snap_test_name = format!("{}::snap::{}", module_path!(), case.test.name.as_str()); + + let snap_test_run = move || { + panic::catch_unwind(|| run_snap_test(case)).map_err(|err| { + let panic_msg = err + .downcast_ref::<&str>() + .unwrap_or(&"Panic message not available"); + + format!("Test case panicked: {}", panic_msg).into() + }) + }; + + test_runs.push(Trial::test(snap_test_name, snap_test_run)); + } + + test_runs +} + +fn run_snap_test(case: test_runner::test_file::TestCase) { + let runner = SnapTestRunner::with_fn(&case.test.name, &case.test.input, |input_str| { + let token_stream = unimarkup_frontend::lexer::TokenStream::tokenize(input_str); + + let token_snaps = token_stream + .map(Snapshot) + .fold(String::new(), |mut agg, snap| { + let _ = writeln!(&mut agg, "{}", snap.as_snapshot()); + agg + }); + format!("{input_str}\n{token_snaps}") + }) + .with_info(format!( + "Test '{}' from '{}'", + case.test.name, case.file_path + )); + + unimarkup_commons::run_snap_test!(runner, &case.out_path); +} diff --git a/frontend/tests/lexer/snapshot.rs b/frontend/tests/lexer/snapshot.rs new file mode 100644 index 00000000..e18ba952 --- /dev/null +++ b/frontend/tests/lexer/snapshot.rs @@ -0,0 +1,88 @@ +use std::fmt::Write; + +use unimarkup_commons::test_runner::as_snapshot::AsSnapshot; +use unimarkup_frontend::lexer::{token::Token, token_kind::TokenKind}; + +use crate::snapshot::Snapshot; + +impl AsSnapshot for Snapshot> { + fn as_snapshot(&self) -> String { + let token = self.0; + + let indent_len = crate::get_indent(token.input, token.span.offs); + + let mut orig_input = token.as_input_str(); + + if orig_input == "\n" { + orig_input = "␊"; + } else if orig_input == "\r\n" { + orig_input = "␍"; + } + + let marker = "^".repeat(token.span.len as usize); + let indent = " ".repeat(indent_len); + let kind = Snapshot(token.kind).as_snapshot(); + + let mut output = String::new(); + let _ = writeln!(&mut output, "{indent}{orig_input}"); + let _ = write!( + &mut output, + "{indent}{marker} - {kind} @ ({} -> {})", + token.span.offs, + token.span.offs + token.span.len + ); + output + } +} + +impl AsSnapshot for Snapshot { + fn as_snapshot(&self) -> String { + #[allow(clippy::useless_format)] + match self.0 { + TokenKind::Star(r) => format!("Star({r})"), + TokenKind::Hash(r) => format!("Hash({r})"), + TokenKind::Minus(r) => format!("Minus({r})"), + TokenKind::Plus(r) => format!("Plus({r})"), + TokenKind::Underline(r) => format!("Underline({r})"), + TokenKind::Caret(r) => format!("Caret({r})"), + TokenKind::Tick(r) => format!("Tick({r})"), + TokenKind::Pipe(r) => format!("Pipe({r})"), + TokenKind::Tilde(r) => format!("Tilde({r})"), + TokenKind::Quote(r) => format!("Quote({r})"), + TokenKind::Dollar(r) => format!("Dollar({r})"), + TokenKind::Colon(r) => format!("Colon({r})"), + TokenKind::Dot(r) => format!("Dot({r})"), + TokenKind::Ampersand(r) => format!("Ampersand({r})"), + TokenKind::Comma(r) => format!("Comma({r})"), + TokenKind::OpenParenthesis => format!("OpenParenthesis"), + TokenKind::CloseParenthesis => format!("CloseParenthesis"), + TokenKind::OpenBracket => format!("OpenBracket"), + TokenKind::CloseBracket => format!("CloseBracket"), + TokenKind::OpenBrace => format!("OpenBrace"), + TokenKind::CloseBrace => format!("CloseBrace"), + TokenKind::Whitespace => format!("Whitespace"), + TokenKind::Newline => format!("Newline"), + TokenKind::Blankline => format!("Blankline"), + TokenKind::Eoi => format!("Eoi"), + TokenKind::Indentation(r) => format!("Indentation({r})"), + TokenKind::EscapedPlain => format!("EscapedPlain"), + TokenKind::EscapedWhitespace => format!("EscapedWhitespace"), + TokenKind::EscapedNewline => format!("EscapedNewline"), + TokenKind::Plain => format!("Plain"), + TokenKind::TerminalPunctuation => format!("TerminalPunctuation"), + TokenKind::Comment { implicit_close } => { + if implicit_close { + format!("Comment(implicitly closed)") + } else { + format!("Comment") + } + } + TokenKind::DirectUri => format!("DirectUri"), + TokenKind::Any => format!("Any"), + TokenKind::Space => format!("Space"), + TokenKind::EnclosedBlockEnd => format!("EnclosedBlockEnd"), + TokenKind::PossibleAttributes => format!("PossibleAttributes"), + TokenKind::PossibleDecorator => format!("PossibleDecorator"), + } + } +} diff --git a/frontend/tests/parser/mod.rs b/frontend/tests/parser/mod.rs new file mode 100644 index 00000000..dfcc7fee --- /dev/null +++ b/frontend/tests/parser/mod.rs @@ -0,0 +1,60 @@ +use std::{fmt::Write, panic}; + +use libtest_mimic::Trial; +use unimarkup_commons::test_runner::{ + self, as_snapshot::AsSnapshot, snap_test_runner::SnapTestRunner, +}; + +use crate::snapshot::Snapshot; + +mod snapshot; + +pub(crate) fn collect_snapshot_tests() -> Vec { + let tests_path = unimarkup_commons::crate_tests_path!(); + let test_cases = test_runner::collect_tests( + tests_path.join("spec/markup"), + tests_path.join("spec/snapshots/parser"), + "markup", + ); + + let mut test_runs = Vec::with_capacity(test_cases.len()); + + for case in test_cases { + let snap_test_name = format!("{}::snap::{}", module_path!(), case.test.name.as_str()); + + let snap_test_run = move || { + panic::catch_unwind(|| run_snap_test(case)).map_err(|err| { + let panic_msg = err + .downcast_ref::<&str>() + .unwrap_or(&"Panic message not available"); + + format!("Test case panicked: {}", panic_msg).into() + }) + }; + + test_runs.push(Trial::test(snap_test_name, snap_test_run)); + } + + test_runs +} + +fn run_snap_test(case: test_runner::test_file::TestCase) { + let runner = SnapTestRunner::with_fn(&case.test.name, &case.test.input, |input_str| { + let block_stream = unimarkup_frontend::parser::parse(input_str); + + let block_snaps = block_stream + .map(|block| Snapshot((input_str.as_str(), block))) + .fold(String::new(), |mut agg, snap| { + let _ = writeln!(&mut agg, "{}", snap.as_snapshot()); + agg + }); + + format!("{block_snaps}") + }) + .with_info(format!( + "Test '{}' from '{}'", + case.test.name, case.file_path + )); + + unimarkup_commons::run_snap_test!(runner, &case.out_path); +} diff --git a/frontend/tests/parser/snapshot.rs b/frontend/tests/parser/snapshot.rs new file mode 100644 index 00000000..6b73a15e --- /dev/null +++ b/frontend/tests/parser/snapshot.rs @@ -0,0 +1,83 @@ +use std::fmt::Write; + +use unimarkup_commons::test_runner::as_snapshot::AsSnapshot; +use unimarkup_frontend::parser::block::{ + bulletlist::BulletList, heading::Heading, paragraph::Paragraph, verbatim::Verbatim, Block, +}; + +use crate::snapshot::Snapshot; + +impl AsSnapshot for Snapshot<(&str, Block)> { + fn as_snapshot(&self) -> String { + let (input, block) = &self.0; + + match block { + Block::Blankline(span) => { + format!("Blankline @ ({} -> {})", span.offs, span.offs + span.len) + } + Block::Heading(heading) => Snapshot((*input, heading)).as_snapshot(), + Block::Paragraph(paragraph) => Snapshot((*input, paragraph)).as_snapshot(), + Block::Verbatim(verbatim) => Snapshot((*input, verbatim)).as_snapshot(), + Block::BulletList(bullet_list) => Snapshot((*input, bullet_list)).as_snapshot(), + } + } +} + +impl AsSnapshot for Snapshot<(&str, &Heading)> { + fn as_snapshot(&self) -> String { + let (_, heading) = self.0; + + let mut output = String::with_capacity(heading.content.iter().map(|c| c.len()).sum()); + + let _ = writeln!( + &mut output, + "Heading({}) @ ({} -> {}) {{", + heading.level, + heading.span.offs, + heading.span.offs + heading.span.len + ); + + for line in heading.content.iter().flat_map(|s| s.lines()) { + let _ = writeln!(&mut output, " {line}"); + } + + let _ = writeln!(&mut output, "}}"); + + output + } +} + +impl AsSnapshot for Snapshot<(&str, &Paragraph)> { + fn as_snapshot(&self) -> String { + let (_, paragraph) = self.0; + + let mut output = String::with_capacity(paragraph.content.iter().map(|s| s.len()).sum()); + + let _ = writeln!( + &mut output, + "Paragraph @ ({} -> {}) {{", + paragraph.span.offs, + paragraph.span.offs + paragraph.span.len + ); + + for line in paragraph.content.iter().flat_map(|s| s.lines()) { + let _ = writeln!(&mut output, " {line}"); + } + + let _ = writeln!(&mut output, "}}"); + + output + } +} + +impl AsSnapshot for Snapshot<(&str, &Verbatim)> { + fn as_snapshot(&self) -> String { + todo!() + } +} + +impl AsSnapshot for Snapshot<(&str, &BulletList)> { + fn as_snapshot(&self) -> String { + todo!() + } +} diff --git a/frontend/tests/snapshot/mod.rs b/frontend/tests/snapshot/mod.rs new file mode 100644 index 00000000..341a55d8 --- /dev/null +++ b/frontend/tests/snapshot/mod.rs @@ -0,0 +1 @@ +pub struct Snapshot(pub T); diff --git a/frontend/tests/snapshots.rs b/frontend/tests/snapshots.rs new file mode 100644 index 00000000..d5732f4d --- /dev/null +++ b/frontend/tests/snapshots.rs @@ -0,0 +1,28 @@ +mod lexer; +mod parser; +mod snapshot; + +use libtest_mimic::Arguments; + +// pub(crate) use snapshot::*; + +fn get_indent(input: &str, offs: u32) -> usize { + input[0..offs as usize] + .bytes() + .rev() + .position(|byte| byte == b'\n') + .unwrap_or(offs as usize) +} + +fn main() { + let args = Arguments::from_args(); + let lexer_tests = lexer::collect_snapshot_tests(); + let _parser_tests = parser::collect_snapshot_tests(); + + let tests = lexer_tests + .into_iter() + .chain(_parser_tests) + .collect::>(); + + libtest_mimic::run(&args, tests).exit(); +} diff --git a/frontend/tests/spec/markup/block/heading.yml b/frontend/tests/spec/markup/block/heading.yml new file mode 100644 index 00000000..c1a9bff6 --- /dev/null +++ b/frontend/tests/spec/markup/block/heading.yml @@ -0,0 +1,48 @@ +# Unimarkup specification version +spec: "0.0.1" + +name: heading-block +description: Test parsing of heading. + +tests: + - name: single-line-level-1 + description: | + Single line level 1 heading. + + input: | + # This is a simple heading. + + html: | +

This is a simple paragraph.

+ + - name: multi-line-level-1 + description: | + Heading over multiple lines. + + input: | + # This is a heading + in two lines. + + html: | +

This is a heading in two lines.

+ + - name: single-line-level-6 + description: | + Level 6 heading in a single line. + + input: | + ###### This is a heading. + + html: | +

This is a heading

+ + - name: multi-line-level-6 + description: | + Level 6 heading in multiple lines. + + input: | + ###### This is a heading + in multiple lines. + + html: | +

This is a heading in multiple lines.

diff --git a/frontend/tests/spec/markup/block/paragraph.yml b/frontend/tests/spec/markup/block/paragraph.yml new file mode 100644 index 00000000..7d7cedad --- /dev/null +++ b/frontend/tests/spec/markup/block/paragraph.yml @@ -0,0 +1,40 @@ +# Unimarkup specification version +spec: "0.0.1" + +name: paragraph-block +description: Test parsing of paragraph. + +tests: + - name: single-line + description: | + Single line paragraph. + + input: | + This is a simple paragraph. + + html: | +

This is a simple paragraph.

+ + - name: multi-line + description: | + Paragraph over multiple lines. + + input: | + This is a paragraph + that spans multiple + lines. + + html: | +

This is a paragraph that spans multiple lines.

+ + - name: two-paragraphs + description: | + Two paragraphs separated by a blank line. + + input: | + This is the first paragraph. + + And this should be the second one. + + html: | +

This is the first paragraph.

And this should be the second one.

diff --git a/frontend/tests/spec/markup/bold.yml b/frontend/tests/spec/markup/bold.yml new file mode 100644 index 00000000..a8b8e90c --- /dev/null +++ b/frontend/tests/spec/markup/bold.yml @@ -0,0 +1,106 @@ +# Unimarkup specification version +spec: "0.0.1" + +name: plain +description: Test lexing of text with bold formatting. + +tests: + - name: simple-bold + description: | + Simple bold text. + + input: | + **Bold** + + html: | + Bold + + - name: bold-not-bold + description: | + Bold combined with plain. + + input: | + **Bold** not bold. + + html: | + Bold not bold. + + - name: not-bold + description: | + Bold that's not correctly opened nor closed. + + input: | + ** not bold ** + + html: | + ** not bold ** + + - name: implicit-closed-bold + description: | + Bold that's implicitly closed after invalid closing sequence. + + input: | + **implicit bold **close + + html: | + implicit bold **close + + - name: not-opened-bold + description: | + Bold that's not correctly opened. + + input: | + ** not bold** + + html: | + ** not bold** + + - name: escaped-bold + description: | + Bold that's escaped. + + input: | + \*\*not bold\*\* + + html: | + **not bold** + + - name: bold-in-middle + description: | + Bold that's found in middle of a text. + + input: | + The next **word** is bold. + + html: | + The next word is bold. + + - name: ambiguous-start + description: | + Ambiguous token with inner bold and implicit closed italic. + + input: | + The next ***word** is bolditalic. + + html: | + The next word is bolditalic. + + - name: ambiguous-end + description: | + Bold that's ended with an ambiguous token. + + input: | + The next **word*** is bold. + + html: | + The next word* is bold. + + - name: ambiguous-close + description: | + BoldItalic that's closed with an ambiguous token. + + input: | + **bold *+italic*** plain + + html: | + bold +italic plain diff --git a/frontend/tests/spec/snapshots/lexer/block/heading/multi-line-level-1.snap b/frontend/tests/spec/snapshots/lexer/block/heading/multi-line-level-1.snap new file mode 100644 index 00000000..4a1bae52 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/block/heading/multi-line-level-1.snap @@ -0,0 +1,49 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'multi-line-level-1' from 'markup/block/heading.yml'" +--- +# This is a heading + in two lines. + +# +^ - Hash(1) @ (0 -> 1) + + ^ - Whitespace @ (1 -> 2) + This + ^^^^ - Plain @ (2 -> 6) + + ^ - Whitespace @ (6 -> 7) + is + ^^ - Plain @ (7 -> 9) + + ^ - Whitespace @ (9 -> 10) + a + ^ - Plain @ (10 -> 11) + + ^ - Whitespace @ (11 -> 12) + heading + ^^^^^^^ - Plain @ (12 -> 19) + ␊ + ^ - Newline @ (19 -> 20) + +^^ - Indentation(2) @ (20 -> 22) + in + ^^ - Plain @ (22 -> 24) + + ^ - Whitespace @ (24 -> 25) + two + ^^^ - Plain @ (25 -> 28) + + ^ - Whitespace @ (28 -> 29) + lines + ^^^^^ - Plain @ (29 -> 34) + . + ^ - Dot(1) @ (34 -> 35) + ␊ + ^ - Newline @ (35 -> 36) + +--- +With input: + +# This is a heading + in two lines. diff --git a/frontend/tests/spec/snapshots/lexer/block/heading/multi-line-level-6.snap b/frontend/tests/spec/snapshots/lexer/block/heading/multi-line-level-6.snap new file mode 100644 index 00000000..706559ac --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/block/heading/multi-line-level-6.snap @@ -0,0 +1,49 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'multi-line-level-6' from 'markup/block/heading.yml'" +--- +###### This is a heading + in multiple lines. + +###### +^^^^^^ - Hash(6) @ (0 -> 6) + + ^ - Whitespace @ (6 -> 7) + This + ^^^^ - Plain @ (7 -> 11) + + ^ - Whitespace @ (11 -> 12) + is + ^^ - Plain @ (12 -> 14) + + ^ - Whitespace @ (14 -> 15) + a + ^ - Plain @ (15 -> 16) + + ^ - Whitespace @ (16 -> 17) + heading + ^^^^^^^ - Plain @ (17 -> 24) + ␊ + ^ - Newline @ (24 -> 25) + +^^^^^^^ - Indentation(7) @ (25 -> 32) + in + ^^ - Plain @ (32 -> 34) + + ^ - Whitespace @ (34 -> 35) + multiple + ^^^^^^^^ - Plain @ (35 -> 43) + + ^ - Whitespace @ (43 -> 44) + lines + ^^^^^ - Plain @ (44 -> 49) + . + ^ - Dot(1) @ (49 -> 50) + ␊ + ^ - Newline @ (50 -> 51) + +--- +With input: + +###### This is a heading + in multiple lines. diff --git a/frontend/tests/spec/snapshots/lexer/block/heading/single-line-level-1.snap b/frontend/tests/spec/snapshots/lexer/block/heading/single-line-level-1.snap new file mode 100644 index 00000000..36cfdc51 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/block/heading/single-line-level-1.snap @@ -0,0 +1,37 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'single-line-level-1' from 'markup/block/heading.yml'" +--- +# This is a simple heading. + +# +^ - Hash(1) @ (0 -> 1) + + ^ - Whitespace @ (1 -> 2) + This + ^^^^ - Plain @ (2 -> 6) + + ^ - Whitespace @ (6 -> 7) + is + ^^ - Plain @ (7 -> 9) + + ^ - Whitespace @ (9 -> 10) + a + ^ - Plain @ (10 -> 11) + + ^ - Whitespace @ (11 -> 12) + simple + ^^^^^^ - Plain @ (12 -> 18) + + ^ - Whitespace @ (18 -> 19) + heading + ^^^^^^^ - Plain @ (19 -> 26) + . + ^ - Dot(1) @ (26 -> 27) + ␊ + ^ - Newline @ (27 -> 28) + +--- +With input: + +# This is a simple heading. diff --git a/frontend/tests/spec/snapshots/lexer/block/heading/single-line-level-6.snap b/frontend/tests/spec/snapshots/lexer/block/heading/single-line-level-6.snap new file mode 100644 index 00000000..805050b0 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/block/heading/single-line-level-6.snap @@ -0,0 +1,33 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'single-line-level-6' from 'markup/block/heading.yml'" +--- +###### This is a heading. + +###### +^^^^^^ - Hash(6) @ (0 -> 6) + + ^ - Whitespace @ (6 -> 7) + This + ^^^^ - Plain @ (7 -> 11) + + ^ - Whitespace @ (11 -> 12) + is + ^^ - Plain @ (12 -> 14) + + ^ - Whitespace @ (14 -> 15) + a + ^ - Plain @ (15 -> 16) + + ^ - Whitespace @ (16 -> 17) + heading + ^^^^^^^ - Plain @ (17 -> 24) + . + ^ - Dot(1) @ (24 -> 25) + ␊ + ^ - Newline @ (25 -> 26) + +--- +With input: + +###### This is a heading. diff --git a/frontend/tests/spec/snapshots/lexer/block/paragraph/multi-line.snap b/frontend/tests/spec/snapshots/lexer/block/paragraph/multi-line.snap new file mode 100644 index 00000000..f0703879 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/block/paragraph/multi-line.snap @@ -0,0 +1,49 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'multi-line' from 'markup/block/paragraph.yml'" +--- +This is a paragraph +that spans multiple +lines. + +This +^^^^ - Plain @ (0 -> 4) + + ^ - Whitespace @ (4 -> 5) + is + ^^ - Plain @ (5 -> 7) + + ^ - Whitespace @ (7 -> 8) + a + ^ - Plain @ (8 -> 9) + + ^ - Whitespace @ (9 -> 10) + paragraph + ^^^^^^^^^ - Plain @ (10 -> 19) + ␊ + ^ - Newline @ (19 -> 20) +that +^^^^ - Plain @ (20 -> 24) + + ^ - Whitespace @ (24 -> 25) + spans + ^^^^^ - Plain @ (25 -> 30) + + ^ - Whitespace @ (30 -> 31) + multiple + ^^^^^^^^ - Plain @ (31 -> 39) + ␊ + ^ - Newline @ (39 -> 40) +lines +^^^^^ - Plain @ (40 -> 45) + . + ^ - Dot(1) @ (45 -> 46) + ␊ + ^ - Newline @ (46 -> 47) + +--- +With input: + +This is a paragraph +that spans multiple +lines. diff --git a/frontend/tests/spec/snapshots/lexer/block/paragraph/single-line.snap b/frontend/tests/spec/snapshots/lexer/block/paragraph/single-line.snap new file mode 100644 index 00000000..a7408917 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/block/paragraph/single-line.snap @@ -0,0 +1,33 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'single-line' from 'markup/block/paragraph.yml'" +--- +This is a simple paragraph. + +This +^^^^ - Plain @ (0 -> 4) + + ^ - Whitespace @ (4 -> 5) + is + ^^ - Plain @ (5 -> 7) + + ^ - Whitespace @ (7 -> 8) + a + ^ - Plain @ (8 -> 9) + + ^ - Whitespace @ (9 -> 10) + simple + ^^^^^^ - Plain @ (10 -> 16) + + ^ - Whitespace @ (16 -> 17) + paragraph + ^^^^^^^^^ - Plain @ (17 -> 26) + . + ^ - Dot(1) @ (26 -> 27) + ␊ + ^ - Newline @ (27 -> 28) + +--- +With input: + +This is a simple paragraph. diff --git a/frontend/tests/spec/snapshots/lexer/block/paragraph/two-paragraphs.snap b/frontend/tests/spec/snapshots/lexer/block/paragraph/two-paragraphs.snap new file mode 100644 index 00000000..8eb8e138 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/block/paragraph/two-paragraphs.snap @@ -0,0 +1,69 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'two-paragraphs' from 'markup/block/paragraph.yml'" +--- +This is the first paragraph. + +And this should be the second one. + +This +^^^^ - Plain @ (0 -> 4) + + ^ - Whitespace @ (4 -> 5) + is + ^^ - Plain @ (5 -> 7) + + ^ - Whitespace @ (7 -> 8) + the + ^^^ - Plain @ (8 -> 11) + + ^ - Whitespace @ (11 -> 12) + first + ^^^^^ - Plain @ (12 -> 17) + + ^ - Whitespace @ (17 -> 18) + paragraph + ^^^^^^^^^ - Plain @ (18 -> 27) + . + ^ - Dot(1) @ (27 -> 28) + ␊ + ^ - Newline @ (28 -> 29) +␊ +^ - Blankline @ (29 -> 30) +And +^^^ - Plain @ (30 -> 33) + + ^ - Whitespace @ (33 -> 34) + this + ^^^^ - Plain @ (34 -> 38) + + ^ - Whitespace @ (38 -> 39) + should + ^^^^^^ - Plain @ (39 -> 45) + + ^ - Whitespace @ (45 -> 46) + be + ^^ - Plain @ (46 -> 48) + + ^ - Whitespace @ (48 -> 49) + the + ^^^ - Plain @ (49 -> 52) + + ^ - Whitespace @ (52 -> 53) + second + ^^^^^^ - Plain @ (53 -> 59) + + ^ - Whitespace @ (59 -> 60) + one + ^^^ - Plain @ (60 -> 63) + . + ^ - Dot(1) @ (63 -> 64) + ␊ + ^ - Newline @ (64 -> 65) + +--- +With input: + +This is the first paragraph. + +And this should be the second one. diff --git a/frontend/tests/spec/snapshots/lexer/bold/ambiguous-close.snap b/frontend/tests/spec/snapshots/lexer/bold/ambiguous-close.snap new file mode 100644 index 00000000..6d5c1659 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/ambiguous-close.snap @@ -0,0 +1,31 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'ambiguous-close' from 'markup/bold.yml'" +--- +**bold *+italic*** plain + +** +^^ - Star(2) @ (0 -> 2) + bold + ^^^^ - Plain @ (2 -> 6) + + ^ - Whitespace @ (6 -> 7) + * + ^ - Star(1) @ (7 -> 8) + + + ^ - Plus(1) @ (8 -> 9) + italic + ^^^^^^ - Plain @ (9 -> 15) + *** + ^^^ - Star(3) @ (15 -> 18) + + ^ - Whitespace @ (18 -> 19) + plain + ^^^^^ - Plain @ (19 -> 24) + ␊ + ^ - Newline @ (24 -> 25) + +--- +With input: + +**bold *+italic*** plain diff --git a/frontend/tests/spec/snapshots/lexer/bold/ambiguous-end.snap b/frontend/tests/spec/snapshots/lexer/bold/ambiguous-end.snap new file mode 100644 index 00000000..63b1f52d --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/ambiguous-end.snap @@ -0,0 +1,37 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'ambiguous-end' from 'markup/bold.yml'" +--- +The next **word*** is bold. + +The +^^^ - Plain @ (0 -> 3) + + ^ - Whitespace @ (3 -> 4) + next + ^^^^ - Plain @ (4 -> 8) + + ^ - Whitespace @ (8 -> 9) + ** + ^^ - Star(2) @ (9 -> 11) + word + ^^^^ - Plain @ (11 -> 15) + *** + ^^^ - Star(3) @ (15 -> 18) + + ^ - Whitespace @ (18 -> 19) + is + ^^ - Plain @ (19 -> 21) + + ^ - Whitespace @ (21 -> 22) + bold + ^^^^ - Plain @ (22 -> 26) + . + ^ - Dot(1) @ (26 -> 27) + ␊ + ^ - Newline @ (27 -> 28) + +--- +With input: + +The next **word*** is bold. diff --git a/frontend/tests/spec/snapshots/lexer/bold/ambiguous-start.snap b/frontend/tests/spec/snapshots/lexer/bold/ambiguous-start.snap new file mode 100644 index 00000000..2cd7af69 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/ambiguous-start.snap @@ -0,0 +1,37 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'ambiguous-start' from 'markup/bold.yml'" +--- +The next ***word** is bolditalic. + +The +^^^ - Plain @ (0 -> 3) + + ^ - Whitespace @ (3 -> 4) + next + ^^^^ - Plain @ (4 -> 8) + + ^ - Whitespace @ (8 -> 9) + *** + ^^^ - Star(3) @ (9 -> 12) + word + ^^^^ - Plain @ (12 -> 16) + ** + ^^ - Star(2) @ (16 -> 18) + + ^ - Whitespace @ (18 -> 19) + is + ^^ - Plain @ (19 -> 21) + + ^ - Whitespace @ (21 -> 22) + bolditalic + ^^^^^^^^^^ - Plain @ (22 -> 32) + . + ^ - Dot(1) @ (32 -> 33) + ␊ + ^ - Newline @ (33 -> 34) + +--- +With input: + +The next ***word** is bolditalic. diff --git a/frontend/tests/spec/snapshots/lexer/bold/bold-in-middle.snap b/frontend/tests/spec/snapshots/lexer/bold/bold-in-middle.snap new file mode 100644 index 00000000..a1fd7db9 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/bold-in-middle.snap @@ -0,0 +1,37 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'bold-in-middle' from 'markup/bold.yml'" +--- +The next **word** is bold. + +The +^^^ - Plain @ (0 -> 3) + + ^ - Whitespace @ (3 -> 4) + next + ^^^^ - Plain @ (4 -> 8) + + ^ - Whitespace @ (8 -> 9) + ** + ^^ - Star(2) @ (9 -> 11) + word + ^^^^ - Plain @ (11 -> 15) + ** + ^^ - Star(2) @ (15 -> 17) + + ^ - Whitespace @ (17 -> 18) + is + ^^ - Plain @ (18 -> 20) + + ^ - Whitespace @ (20 -> 21) + bold + ^^^^ - Plain @ (21 -> 25) + . + ^ - Dot(1) @ (25 -> 26) + ␊ + ^ - Newline @ (26 -> 27) + +--- +With input: + +The next **word** is bold. diff --git a/frontend/tests/spec/snapshots/lexer/bold/bold-not-bold.snap b/frontend/tests/spec/snapshots/lexer/bold/bold-not-bold.snap new file mode 100644 index 00000000..beff2e2e --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/bold-not-bold.snap @@ -0,0 +1,29 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'bold-not-bold' from 'markup/bold.yml'" +--- +**Bold** not bold. + +** +^^ - Star(2) @ (0 -> 2) + Bold + ^^^^ - Plain @ (2 -> 6) + ** + ^^ - Star(2) @ (6 -> 8) + + ^ - Whitespace @ (8 -> 9) + not + ^^^ - Plain @ (9 -> 12) + + ^ - Whitespace @ (12 -> 13) + bold + ^^^^ - Plain @ (13 -> 17) + . + ^ - Dot(1) @ (17 -> 18) + ␊ + ^ - Newline @ (18 -> 19) + +--- +With input: + +**Bold** not bold. diff --git a/frontend/tests/spec/snapshots/lexer/bold/escaped-bold.snap b/frontend/tests/spec/snapshots/lexer/bold/escaped-bold.snap new file mode 100644 index 00000000..accaa4bb --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/escaped-bold.snap @@ -0,0 +1,27 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'escaped-bold' from 'markup/bold.yml'" +--- +\*\*not bold\*\* + +\* +^^ - Plain @ (0 -> 2) + \* + ^^ - Plain @ (2 -> 4) + not + ^^^ - Plain @ (4 -> 7) + + ^ - Whitespace @ (7 -> 8) + bold + ^^^^ - Plain @ (8 -> 12) + \* + ^^ - Plain @ (12 -> 14) + \* + ^^ - Plain @ (14 -> 16) + ␊ + ^ - Newline @ (16 -> 17) + +--- +With input: + +\*\*not bold\*\* diff --git a/frontend/tests/spec/snapshots/lexer/bold/implicit-closed-bold.snap b/frontend/tests/spec/snapshots/lexer/bold/implicit-closed-bold.snap new file mode 100644 index 00000000..cd3fde1e --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/implicit-closed-bold.snap @@ -0,0 +1,27 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'implicit-closed-bold' from 'markup/bold.yml'" +--- +**implicit bold **close + +** +^^ - Star(2) @ (0 -> 2) + implicit + ^^^^^^^^ - Plain @ (2 -> 10) + + ^ - Whitespace @ (10 -> 11) + bold + ^^^^ - Plain @ (11 -> 15) + + ^ - Whitespace @ (15 -> 16) + ** + ^^ - Star(2) @ (16 -> 18) + close + ^^^^^ - Plain @ (18 -> 23) + ␊ + ^ - Newline @ (23 -> 24) + +--- +With input: + +**implicit bold **close diff --git a/frontend/tests/spec/snapshots/lexer/bold/not-bold.snap b/frontend/tests/spec/snapshots/lexer/bold/not-bold.snap new file mode 100644 index 00000000..d856065e --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/not-bold.snap @@ -0,0 +1,27 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'not-bold' from 'markup/bold.yml'" +--- +** not bold ** + +** +^^ - Star(2) @ (0 -> 2) + + ^ - Whitespace @ (2 -> 3) + not + ^^^ - Plain @ (3 -> 6) + + ^ - Whitespace @ (6 -> 7) + bold + ^^^^ - Plain @ (7 -> 11) + + ^ - Whitespace @ (11 -> 12) + ** + ^^ - Star(2) @ (12 -> 14) + ␊ + ^ - Newline @ (14 -> 15) + +--- +With input: + +** not bold ** diff --git a/frontend/tests/spec/snapshots/lexer/bold/not-opened-bold.snap b/frontend/tests/spec/snapshots/lexer/bold/not-opened-bold.snap new file mode 100644 index 00000000..99052678 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/not-opened-bold.snap @@ -0,0 +1,25 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'not-opened-bold' from 'markup/bold.yml'" +--- +** not bold** + +** +^^ - Star(2) @ (0 -> 2) + + ^ - Whitespace @ (2 -> 3) + not + ^^^ - Plain @ (3 -> 6) + + ^ - Whitespace @ (6 -> 7) + bold + ^^^^ - Plain @ (7 -> 11) + ** + ^^ - Star(2) @ (11 -> 13) + ␊ + ^ - Newline @ (13 -> 14) + +--- +With input: + +** not bold** diff --git a/frontend/tests/spec/snapshots/lexer/bold/simple-bold.snap b/frontend/tests/spec/snapshots/lexer/bold/simple-bold.snap new file mode 100644 index 00000000..63c5fd54 --- /dev/null +++ b/frontend/tests/spec/snapshots/lexer/bold/simple-bold.snap @@ -0,0 +1,19 @@ +--- +source: frontend/tests/lexer/mod.rs +info: "Test 'simple-bold' from 'markup/bold.yml'" +--- +**Bold** + +** +^^ - Star(2) @ (0 -> 2) + Bold + ^^^^ - Plain @ (2 -> 6) + ** + ^^ - Star(2) @ (6 -> 8) + ␊ + ^ - Newline @ (8 -> 9) + +--- +With input: + +**Bold** diff --git a/frontend/tests/spec/snapshots/parser/block/heading/multi-line-level-1.snap b/frontend/tests/spec/snapshots/parser/block/heading/multi-line-level-1.snap new file mode 100644 index 00000000..be438cd8 --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/block/heading/multi-line-level-1.snap @@ -0,0 +1,15 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'multi-line-level-1' from 'markup/block/heading.yml'" +--- +Heading(level-1) @ (1 -> 36) { + This is a heading + in two lines. +} + + +--- +With input: + +# This is a heading + in two lines. diff --git a/frontend/tests/spec/snapshots/parser/block/heading/multi-line-level-6.snap b/frontend/tests/spec/snapshots/parser/block/heading/multi-line-level-6.snap new file mode 100644 index 00000000..58a5891a --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/block/heading/multi-line-level-6.snap @@ -0,0 +1,15 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'multi-line-level-6' from 'markup/block/heading.yml'" +--- +Heading(level-6) @ (6 -> 51) { + This is a heading + in multiple lines. +} + + +--- +With input: + +###### This is a heading + in multiple lines. diff --git a/frontend/tests/spec/snapshots/parser/block/heading/single-line-level-1.snap b/frontend/tests/spec/snapshots/parser/block/heading/single-line-level-1.snap new file mode 100644 index 00000000..dc720a9c --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/block/heading/single-line-level-1.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'single-line-level-1' from 'markup/block/heading.yml'" +--- +Heading(level-1) @ (1 -> 28) { + This is a simple heading. +} + + +--- +With input: + +# This is a simple heading. diff --git a/frontend/tests/spec/snapshots/parser/block/heading/single-line-level-6.snap b/frontend/tests/spec/snapshots/parser/block/heading/single-line-level-6.snap new file mode 100644 index 00000000..a34b453e --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/block/heading/single-line-level-6.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'single-line-level-6' from 'markup/block/heading.yml'" +--- +Heading(level-6) @ (6 -> 26) { + This is a heading. +} + + +--- +With input: + +###### This is a heading. diff --git a/frontend/tests/spec/snapshots/parser/block/paragraph/multi-line.snap b/frontend/tests/spec/snapshots/parser/block/paragraph/multi-line.snap new file mode 100644 index 00000000..a0899fbd --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/block/paragraph/multi-line.snap @@ -0,0 +1,17 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'multi-line' from 'markup/block/paragraph.yml'" +--- +Paragraph @ (0 -> 47) { + This is a paragraph + that spans multiple + lines. +} + + +--- +With input: + +This is a paragraph +that spans multiple +lines. diff --git a/frontend/tests/spec/snapshots/parser/block/paragraph/single-line.snap b/frontend/tests/spec/snapshots/parser/block/paragraph/single-line.snap new file mode 100644 index 00000000..b3f2e08b --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/block/paragraph/single-line.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'single-line' from 'markup/block/paragraph.yml'" +--- +Paragraph @ (0 -> 28) { + This is a simple paragraph. +} + + +--- +With input: + +This is a simple paragraph. diff --git a/frontend/tests/spec/snapshots/parser/block/paragraph/two-paragraphs.snap b/frontend/tests/spec/snapshots/parser/block/paragraph/two-paragraphs.snap new file mode 100644 index 00000000..0785118e --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/block/paragraph/two-paragraphs.snap @@ -0,0 +1,19 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'two-paragraphs' from 'markup/block/paragraph.yml'" +--- +Paragraph @ (0 -> 29) { + This is the first paragraph. +} + +Paragraph @ (30 -> 65) { + And this should be the second one. +} + + +--- +With input: + +This is the first paragraph. + +And this should be the second one. diff --git a/frontend/tests/spec/snapshots/parser/bold/ambiguous-close.snap b/frontend/tests/spec/snapshots/parser/bold/ambiguous-close.snap new file mode 100644 index 00000000..1a28d630 --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/ambiguous-close.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'ambiguous-close' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 25) { + **bold *+italic*** plain +} + + +--- +With input: + +**bold *+italic*** plain diff --git a/frontend/tests/spec/snapshots/parser/bold/ambiguous-end.snap b/frontend/tests/spec/snapshots/parser/bold/ambiguous-end.snap new file mode 100644 index 00000000..f8abeb25 --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/ambiguous-end.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'ambiguous-end' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 28) { + The next **word*** is bold. +} + + +--- +With input: + +The next **word*** is bold. diff --git a/frontend/tests/spec/snapshots/parser/bold/ambiguous-start.snap b/frontend/tests/spec/snapshots/parser/bold/ambiguous-start.snap new file mode 100644 index 00000000..ffc77635 --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/ambiguous-start.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'ambiguous-start' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 34) { + The next ***word** is bolditalic. +} + + +--- +With input: + +The next ***word** is bolditalic. diff --git a/frontend/tests/spec/snapshots/parser/bold/bold-in-middle.snap b/frontend/tests/spec/snapshots/parser/bold/bold-in-middle.snap new file mode 100644 index 00000000..9576707b --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/bold-in-middle.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'bold-in-middle' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 27) { + The next **word** is bold. +} + + +--- +With input: + +The next **word** is bold. diff --git a/frontend/tests/spec/snapshots/parser/bold/bold-not-bold.snap b/frontend/tests/spec/snapshots/parser/bold/bold-not-bold.snap new file mode 100644 index 00000000..e62c9f4f --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/bold-not-bold.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'bold-not-bold' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 19) { + **Bold** not bold. +} + + +--- +With input: + +**Bold** not bold. diff --git a/frontend/tests/spec/snapshots/parser/bold/escaped-bold.snap b/frontend/tests/spec/snapshots/parser/bold/escaped-bold.snap new file mode 100644 index 00000000..95ea6b97 --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/escaped-bold.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'escaped-bold' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 17) { + \*\*not bold\*\* +} + + +--- +With input: + +\*\*not bold\*\* diff --git a/frontend/tests/spec/snapshots/parser/bold/implicit-closed-bold.snap b/frontend/tests/spec/snapshots/parser/bold/implicit-closed-bold.snap new file mode 100644 index 00000000..4d77787a --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/implicit-closed-bold.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'implicit-closed-bold' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 24) { + **implicit bold **close +} + + +--- +With input: + +**implicit bold **close diff --git a/frontend/tests/spec/snapshots/parser/bold/not-bold.snap b/frontend/tests/spec/snapshots/parser/bold/not-bold.snap new file mode 100644 index 00000000..074cc742 --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/not-bold.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'not-bold' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 15) { + ** not bold ** +} + + +--- +With input: + +** not bold ** diff --git a/frontend/tests/spec/snapshots/parser/bold/not-opened-bold.snap b/frontend/tests/spec/snapshots/parser/bold/not-opened-bold.snap new file mode 100644 index 00000000..f3409e6d --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/not-opened-bold.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'not-opened-bold' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 14) { + ** not bold** +} + + +--- +With input: + +** not bold** diff --git a/frontend/tests/spec/snapshots/parser/bold/simple-bold.snap b/frontend/tests/spec/snapshots/parser/bold/simple-bold.snap new file mode 100644 index 00000000..927df145 --- /dev/null +++ b/frontend/tests/spec/snapshots/parser/bold/simple-bold.snap @@ -0,0 +1,13 @@ +--- +source: frontend/tests/parser/mod.rs +info: "Test 'simple-bold' from 'markup/bold.yml'" +--- +Paragraph @ (0 -> 9) { + **Bold** +} + + +--- +With input: + +**Bold** diff --git a/inline/tests/parser/mod.rs b/inline/tests/parser/mod.rs index 6001404f..7698e6f0 100644 --- a/inline/tests/parser/mod.rs +++ b/inline/tests/parser/mod.rs @@ -65,11 +65,12 @@ fn run_spec_test(case: test_runner::test_file::TestCase) { } fn run_snap_test(case: test_runner::test_file::TestCase) { - let tokens = unimarkup_commons::lexer::token::lex_str(&case.test.input); + let runner = SnapTestRunner::with_fn(&case.test.name, &case.test.input, |input| { + let slice: &[_] = &unimarkup_commons::lexer::token::lex_str(input); + let token_iterator = slice.into(); - let runner = SnapTestRunner::with_fn(&case.test.name, &tokens, |slice| { let (_, _, parsed_inlines) = unimarkup_inline::parser::parse_inlines( - slice.into(), + token_iterator, InlineContext::default(), None, None, diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 11fd5bea..89e6ec30 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -7,6 +7,122 @@ pub mod elements; pub mod log_id; pub mod metadata; mod parser; +// mod parser2; pub mod security; pub use parser::*; + +/* + +~~~ +test +~~~ +## heading // indentation 3 Token::Indentation(usize) + laskfjlks + lkajsdflk +> hello there + +. list entry // let indent = Indentation(2) + > quote // Indentation(4) + multi-line // Indentation(4) + laksdjf // indent is still available + + +fn parse_list(&mut self) -> List { + let parent_indent = self.parent_indent(); + + + let indent = ctx.indent(); + ctx.push_indent(); + + loop { + let list_entry = list_entry(ctx); + + // indent is still available here... + } + + ctx.pop_indent(); +} + +fn parse_list_entry(ctx: &mut Context) -> ListEntry { + let indent = ctx.indent(); + + loop { + let quote = parse_quote(ctx); + + // indent is still available here + } + + return ListEntry { ... }; +} + +[[[
+[[[ +inner +]]] +]]] + +[[[ +# Heading lvl 1 + +- list entry 1 + +- entry 2 + . nested numbered list entry 1 + # body for nested numbered + + [[[ + ]]] bla bla + + + body for bullet entry 2, but not for numbered + + Cow<'a, str> + +===|#| |_| +| r1 c1 | r1 c2 | r1 c3 | ++ merge | merge | merge | +| r3 c1 | r3 c2 | r3 c3 | +| r4 c1 |+ r4 c1 | r4 c3 | +! not merged | merged |! not merged | +# head r5 | bla | bla | +_ lksad | jsl | jkd | +==={ + id: my_table; +} + +===|#| |_| +| r1 c1 | r1 c2 | r1 c3 | +|+ merge | merge | merge | +| r3 c1 | r3 c2 | r3 c3 | +| r4 c1 |+ r4 c1 | r4 c3 | +|! not merged | merged |! not merged | +|# head r5 | bla | bla | +|_ lksad | jsl | jkd | +==={ + id: my_table; +} +]]] + +```rs +let bal; +``` + +. nested numbered list entry 1 + # body for nested numbered + +=> ol li "nested numbered list entry 1 # body for nested numbered" + +. nested numbered list entry 1 + + body for nested numbered + +=> ol li entry-head("nested numbered list entry 1") entry-body(p(body for nested numbered)) + + +- list 1 +- list 1 + + + + */ diff --git a/render/Cargo.toml b/render/Cargo.toml index 4e20685b..568a9ae1 100644 --- a/render/Cargo.toml +++ b/render/Cargo.toml @@ -21,9 +21,9 @@ serde_yaml.workspace = true unimarkup-commons = { path = "../commons/", version = "0" } unimarkup-inline = { path = "../inline/", version = "0" } unimarkup-parser = { path = "../parser/", version = "0" } -syntect = "5.0" -rustyscript = "0.1.1" -spreadsheet-ods = "0.17.0" +syntect = "5.2.0" +rustyscript = "0.8.3" +spreadsheet-ods = "0.22.5" headless_chrome = "1.0.9" tempfile = "3.8.0" mathemascii = "0.4.0" diff --git a/render/src/html/citeproc/mod.rs b/render/src/html/citeproc/mod.rs index 52061e82..31fb4c71 100644 --- a/render/src/html/citeproc/mod.rs +++ b/render/src/html/citeproc/mod.rs @@ -73,7 +73,7 @@ impl CiteprocWrapper { match self.init_processor(doc, for_pagedjs) { Ok(_) => self .module - .call("getCitationStrings", citation_id_vectors) + .call("getCitationStrings", &citation_id_vectors) .map_err(|_| CiteError::CitationError), Err(e) => Err(e), }