diff --git a/src/parsers/bnf.rs b/src/parsers/bnf.rs index 745db88..ae3c9cf 100644 --- a/src/parsers/bnf.rs +++ b/src/parsers/bnf.rs @@ -79,4 +79,62 @@ mod tests { let (_, actual) = grammar::(input).unwrap(); assert_eq!(expected, actual); } + + #[test] + fn production_with_comment_suffix() { + let input = " ::= 'x' ; only a comment\n"; + let expected = Production::from_parts( + Term::Nonterminal("a".to_string()), + vec![Expression::from_parts(vec![Term::Terminal( + "x".to_string(), + )])], + ); + + let (_, actual) = production::(input).unwrap(); + assert_eq!(expected, actual); + } + + #[test] + fn grammar_with_comment_only_line() { + let input = " ::= 'x'\n; comment\n ::= 'y'\n"; + let expected = Grammar::from_parts(vec![ + Production::from_parts( + Term::Nonterminal("a".to_string()), + vec![Expression::from_parts(vec![Term::Terminal( + "x".to_string(), + )])], + ), + Production::from_parts( + Term::Nonterminal("b".to_string()), + vec![Expression::from_parts(vec![Term::Terminal( + "y".to_string(), + )])], + ), + ]); + + let (_, actual) = grammar::(input).unwrap(); + assert_eq!(expected, actual); + } + + #[test] + fn grammar_with_comment_to_eof() { + let input = " ::= 'x'\n ::= 'y' ; last line comment"; + let expected = Grammar::from_parts(vec![ + Production::from_parts( + Term::Nonterminal("a".to_string()), + vec![Expression::from_parts(vec![Term::Terminal( + "x".to_string(), + )])], + ), + Production::from_parts( + Term::Nonterminal("b".to_string()), + vec![Expression::from_parts(vec![Term::Terminal( + "y".to_string(), + )])], + ), + ]); + + let (_, actual) = grammar::(input).unwrap(); + assert_eq!(expected, actual); + } } diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 46be27b..11083d1 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -496,6 +496,27 @@ fn normalize_parsed_grammar(parsed: ParsedGrammar) -> Grammar { pub mod tests { use super::*; + #[test] + fn whitespace_plus_comments_skips_comment_then_rest() { + let input = " ; comment\n rest"; + let (remaining, _) = whitespace_plus_comments(input).unwrap(); + assert_eq!(remaining, "rest"); + } + + #[test] + fn whitespace_plus_comments_comment_to_eof() { + let input = " ; comment"; + let (remaining, _) = whitespace_plus_comments(input).unwrap(); + assert_eq!(remaining, ""); + } + + #[test] + fn whitespace_plus_comments_skips_only_whitespace_without_semicolon() { + let input = " x"; + let (remaining, _) = whitespace_plus_comments(input).unwrap(); + assert!(remaining.starts_with('x')); + } + #[test] fn terminal_match() { let input = "\"hello world\""; diff --git a/src/production.rs b/src/production.rs index 75db118..3f8b832 100644 --- a/src/production.rs +++ b/src/production.rs @@ -297,6 +297,24 @@ mod tests { assert_eq!(prod, crate::production!( ::= 'A')); } + #[test] + fn parse_comment_with_text_then_newline() { + let prod = Production::from_str(" ::= 'x' ; this is a comment\n").unwrap(); + assert_eq!(prod, crate::production!( ::= 'x')); + } + + #[test] + fn parse_comment_to_eof() { + let prod = Production::from_str(" ::= 'x' ; comment").unwrap(); + assert_eq!(prod, crate::production!( ::= 'x')); + } + + #[test] + fn parse_comment_between_alternatives() { + let prod = Production::from_str(" ::= 'x' ; comment\n | 'y'").unwrap(); + assert_eq!(prod, crate::production!( ::= 'x' | 'y')); + } + #[test] fn parse_incomplete() { let result = Production::from_str(""); diff --git a/tests/from_str.rs b/tests/from_str.rs index b8dfd10..932316a 100644 --- a/tests/from_str.rs +++ b/tests/from_str.rs @@ -107,3 +107,71 @@ mod custom_trait { assert!(nonterminal.is_ok()) } } + +mod comments { + use bnf::{Grammar, Term}; + + #[test] + fn grammar_with_comments_throughout() { + let input = " ::= 'x' ; end of first rule +; comment-only line + ::= 'y' ; end of second rule"; + let grammar: Grammar = input.parse().expect("parse"); + assert_eq!( + grammar.productions_iter().count(), + 2, + "parsed grammar must have two productions" + ); + let mut prods = grammar.productions_iter(); + let first = prods.next().unwrap(); + assert_eq!(first.lhs, Term::Nonterminal("a".into())); + assert_eq!(first.rhs_iter().next().unwrap().to_string(), "'x'"); + let second = prods.next().unwrap(); + assert_eq!(second.lhs, Term::Nonterminal("b".into())); + assert_eq!(second.rhs_iter().next().unwrap().to_string(), "'y'"); + } + + #[test] + fn comment_does_not_break_parsing() { + let input = " ::= 'x' ; note\n ::= 'y'"; + let grammar: Grammar = input.parse().expect("parse"); + assert_eq!( + grammar.productions_iter().count(), + 2, + "parsed grammar must have two productions" + ); + let mut prods = grammar.productions_iter(); + let first = prods.next().unwrap(); + assert_eq!(first.lhs, Term::Nonterminal("a".into())); + assert_eq!(first.rhs_iter().next().unwrap().to_string(), "'x'"); + let second = prods.next().unwrap(); + assert_eq!(second.lhs, Term::Nonterminal("b".into())); + assert_eq!(second.rhs_iter().next().unwrap().to_string(), "'y'"); + } + + /// Full annotated DNA grammar: leading comment, inline comment, trailing comment. + /// Comments are stripped; the grammar parses to the same structure as the uncommented version. + #[test] + fn annotated_dna_grammar_with_comments() { + let grammar_str = "; the building blocks of life! + ::= | + ::= 'A' | 'C' | 'G' | 'T' ;(Adenine, Cytosine, Guanine, and Thymine) +; the end 📖"; + let grammar: Grammar = grammar_str.parse().expect("parse annotated DNA grammar"); + + assert_eq!( + grammar.productions_iter().count(), + 2, + "annotated grammar must have two productions (dna, base)" + ); + let mut prods = grammar.productions_iter(); + let dna = prods.next().unwrap(); + assert_eq!(dna.lhs, Term::Nonterminal("dna".into())); + let dna_rhs: Vec<_> = dna.rhs_iter().map(|e| e.to_string()).collect(); + assert_eq!(dna_rhs, ["", " "]); + let base = prods.next().unwrap(); + assert_eq!(base.lhs, Term::Nonterminal("base".into())); + let base_rhs: Vec<_> = base.rhs_iter().map(|e| e.to_string()).collect(); + assert_eq!(base_rhs, ["'A'", "'C'", "'G'", "'T'"]); + } +} diff --git a/tests/parse_input.rs b/tests/parse_input.rs index 940e83e..4ded229 100644 --- a/tests/parse_input.rs +++ b/tests/parse_input.rs @@ -67,6 +67,22 @@ fn dna_right_recursive() { assert_snapshot!(parses.join("\n")); } +#[test] +fn dna_annotated_grammar_parses_input() { + // Same DNA grammar as dna_right_recursive but with BNF comments throughout. + // Comments are stripped; parsing "GATTACA" yields the same parse trees. + let grammar: Grammar = "; the building blocks of life! + ::= | + ::= 'A' | 'C' | 'G' | 'T' ;(Adenine, Cytosine, Guanine, and Thymine) +; the end 📖" + .parse() + .unwrap(); + + let input = "GATTACA"; + let parses: Vec<_> = grammar.parse_input(input).map(|a| a.to_string()).collect(); + assert_snapshot!(parses.join("\n")); +} + #[test] fn ambiguous() { let grammar: Grammar = " ::= | diff --git a/tests/snapshots/parse_input__dna_annotated_grammar_parses_input.snap b/tests/snapshots/parse_input__dna_annotated_grammar_parses_input.snap new file mode 100644 index 0000000..be0f1de --- /dev/null +++ b/tests/snapshots/parse_input__dna_annotated_grammar_parses_input.snap @@ -0,0 +1,25 @@ +--- +source: tests/parse_input.rs +expression: "parses.join(\"\\n\")" +--- + ::= +├── ::= "G" +│ └── "G" +└── ::= + ├── ::= "A" + │ └── "A" + └── ::= + ├── ::= "T" + │ └── "T" + └── ::= + ├── ::= "T" + │ └── "T" + └── ::= + ├── ::= "A" + │ └── "A" + └── ::= + ├── ::= "C" + │ └── "C" + └── ::= + └── ::= "A" + └── "A"