Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 48 additions & 45 deletions src/earley/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,47 @@ mod input_range;
mod traversal;

use crate::parser::grammar::ParseGrammar;
use crate::{ParseTree, ParseTreeNode, Term, tracing};
use crate::{GrammarParser, ParseTree, ParseTreeNode, Term, tracing};
use input_range::InputRange;
use std::collections::{BTreeSet, HashSet, VecDeque};
use std::rc::Rc;
use traversal::{TermMatch, Traversal, TraversalId, TraversalTree};

pub fn parse<'gram>(
grammar: &'gram crate::Grammar,
grammar: &'gram GrammarParser<'gram>,
input: &'gram str,
starting_term: Option<&'gram Term>,
) -> impl Iterator<Item = ParseTree<'gram>> {
ParseTreeIter::new(grammar, input)
ParseTreeIter::new(ParserHold::Borrowed(grammar), input, starting_term)
}

pub fn parse_starting_with<'gram>(
grammar: &'gram crate::Grammar,
/// Parse using an owned parser (e.g. from deprecated `Grammar::parse_input`).
/// The iterator holds `Rc<GrammarParser>` to keep the parser alive.
pub fn parse_with_parser_rc<'gram>(
parser: Rc<GrammarParser<'gram>>,
input: &'gram str,
starting_term: &'gram Term,
starting_term: Option<&'gram Term>,
) -> impl Iterator<Item = ParseTree<'gram>> {
ParseTreeIter::new_starting_with(grammar, input, starting_term)
ParseTreeIter::new(ParserHold::Owned(parser), input, starting_term)
}

/// Parse input using a pre-built `ParseGrammar`, starting with the given term.
/// This allows reusing the `ParseGrammar` for multiple inputs.
pub(crate) fn parse_starting_with_grammar<'gram>(
parse_grammar: &Rc<ParseGrammar<'gram>>,
input: &'gram str,
starting_term: &'gram Term,
) -> impl Iterator<Item = ParseTree<'gram>> {
// Clone the Rc (just increments reference count, no data copying)
ParseTreeIter::new_starting_with_grammar(Rc::clone(parse_grammar), input, starting_term)
/// Holds either a borrowed or owned parser so the iterator can keep it alive when needed.
///
/// Only required for the deprecated `Grammar::parse_input` and `Grammar::parse_input_starting_with` methods.
/// Prefer `GrammarParser::parse_input` and `GrammarParser::parse_input_starting_with` instead.
#[derive(Debug)]
enum ParserHold<'gram> {
Borrowed(&'gram GrammarParser<'gram>),
Owned(Rc<GrammarParser<'gram>>),
}

impl<'gram> ParserHold<'gram> {
fn as_ref(&self) -> &GrammarParser<'gram> {
match self {
ParserHold::Borrowed(p) => p,
ParserHold::Owned(rc) => rc.as_ref(),
}
}
}

/// A queue of [`TraversalId`] for processing, with repetitions ignored.
Expand Down Expand Up @@ -169,45 +180,35 @@ fn earley<'gram>(

#[derive(Debug)]
struct ParseTreeIter<'gram> {
parser: ParserHold<'gram>,
traversal_tree: TraversalTree<'gram>,
grammar: Rc<ParseGrammar<'gram>>,
queue: TraversalQueue,
completions: CompletionMap<'gram>,
}

impl<'gram> ParseTreeIter<'gram> {
pub fn new(grammar: &'gram crate::Grammar, input: &'gram str) -> Self {
let starting_term = grammar
.starting_term()
.expect("Grammar must have one production to parse");

Self::new_starting_with(grammar, input, starting_term)
}

pub fn new_starting_with(
grammar: &'gram crate::Grammar,
pub fn new(
parser: ParserHold<'gram>,
input: &'gram str,
starting_term: &'gram Term,
starting_term: Option<&'gram Term>,
) -> Self {
let parse_grammar = Rc::new(ParseGrammar::new(grammar));
Self::new_starting_with_grammar(parse_grammar, input, starting_term)
}

pub(crate) fn new_starting_with_grammar(
parse_grammar: Rc<ParseGrammar<'gram>>,
input: &'gram str,
starting_term: &'gram Term,
) -> Self {
let input = InputRange::new(input);
let input_range = InputRange::new(input);
let mut traversal_tree = TraversalTree::default();
let mut queue = TraversalQueue::default();
let completions = CompletionMap::default();

queue.push_back_starting(&mut traversal_tree, &parse_grammar, starting_term, &input);
let parser_ref = parser.as_ref();
let starting_term = starting_term.unwrap_or(parser_ref.starting_term);

queue.push_back_starting(
&mut traversal_tree,
parser_ref.parse_grammar.as_ref(),
starting_term,
&input_range,
);

Self {
traversal_tree,
grammar: parse_grammar,
parser,
queue,
completions,
}
Expand All @@ -220,13 +221,14 @@ impl<'gram> Iterator for ParseTreeIter<'gram> {
let Self {
queue,
completions,
grammar,
parser,
traversal_tree,
} = self;
let parse_grammar = &parser.as_ref().parse_grammar;

earley(queue, traversal_tree, completions, grammar).map(|traversal_id| {
earley(queue, traversal_tree, completions, parse_grammar).map(|traversal_id| {
let _span = tracing::span!(tracing::Level::DEBUG, "next_parse_tree").entered();
let parse_tree = parse_tree(traversal_tree, grammar, traversal_id);
let parse_tree = parse_tree(traversal_tree, parse_grammar, traversal_id);
tracing::event!(tracing::Level::TRACE, "\n{parse_tree}");
parse_tree
})
Expand Down Expand Up @@ -338,8 +340,9 @@ mod tests {

fn prop_empty_rules_allow_parse(grammar: NestedEmptyGrammar) -> TestResult {
let input = "a";
let parser = GrammarParser::new(&grammar.0).unwrap();

let mut parses = parse(&grammar.0, input);
let mut parses = parse(&parser, input, None);
TestResult::from_bool(parses.next().is_some())
}

Expand Down
2 changes: 1 addition & 1 deletion src/earley/traversal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ mod tests {
grammar: &'a Grammar,
input: &'static str,
) -> (ParseGrammar<'a>, InputRange<'static>, TraversalTree<'a>) {
let matching = ParseGrammar::new(grammar);
let matching = ParseGrammar::new(grammar).unwrap();
let input = InputRange::new(input);
let tree = TraversalTree::default();

Expand Down
96 changes: 87 additions & 9 deletions src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ use rand::{Rng, SeedableRng, rng, rngs::StdRng, seq::IndexedRandom};
use serde::{Deserialize, Serialize};

use std::fmt::{self, Write};
use std::rc::Rc;
use std::str;

/// A node of a `ParseTree`, either terminating or continuing the `ParseTree`
Expand All @@ -69,7 +70,7 @@ impl<'gram> ParseTree<'gram> {
}

// A set of column indices, used for tracking which columns are active when formatting a `ParseTree`
type ParseTreeFormatSet = std::collections::HashSet<usize>;
type ParseTreeFormatSet = crate::HashSet<usize>;

impl<'gram> ParseTree<'gram> {
fn fmt(
Expand Down Expand Up @@ -344,11 +345,10 @@ impl Grammar {

/// Remove `Production` from the `Grammar`
pub fn remove_production(&mut self, prod: &Production) -> Option<Production> {
if let Some(pos) = self.productions.iter().position(|x| *x == *prod) {
Some(self.productions.remove(pos))
} else {
None
}
self.productions
.iter()
.position(|x| *x == *prod)
.map(|pos| self.productions.swap_remove(pos))
}

/// Get iterator of the `Grammar`'s `Production`s
Expand All @@ -361,6 +361,40 @@ impl Grammar {
self.productions.iter_mut()
}

/// Validate the `Grammar` has no undefined nonterminals
///
/// No need to call this method before building a parser, as the parser will validate the grammar at construction time.
///
/// # Errors
///
/// Returns `Error::ValidationError` if the grammar has no productions or has undefined nonterminals.
pub fn validate(&self) -> Result<(), Error> {
if self.productions.is_empty() {
return Err(Error::ValidationError(
"Grammar must have at least one production".to_string(),
));
}
let mut sets = crate::validation::NonterminalSets::new();
for production in self.productions_iter() {
if let Term::Nonterminal(nt) = &production.lhs {
sets.record_lhs(nt.as_str());
}
for expression in production.rhs_iter() {
for term in expression.terms_iter() {
if let Term::Nonterminal(nt) = term {
sets.record_rhs(nt.as_str());
}
}
}
}
if let Some(undefined) = sets.undefined().next() {
return Err(Error::ValidationError(format!(
"Undefined nonterminals: <{undefined}>"
)));
}
Ok(())
}

/// Build a reusable parser from this grammar, validating that all nonterminals are defined.
///
/// This method validates the grammar and creates a [`crate::GrammarParser`] that can be
Expand Down Expand Up @@ -412,7 +446,8 @@ impl Grammar {
&'gram self,
input: &'gram str,
) -> impl Iterator<Item = ParseTree<'gram>> {
crate::earley::parse(self, input)
let parser = Rc::new(crate::GrammarParser::new_unchecked(self));
crate::earley::parse_with_parser_rc(parser, input, None)
}

/// Parse input strings according to `Grammar`, starting with given production
Expand Down Expand Up @@ -440,7 +475,8 @@ impl Grammar {
input: &'gram str,
starting_term: &'gram Term,
) -> impl Iterator<Item = ParseTree<'gram>> {
crate::earley::parse_starting_with(self, input, starting_term)
let parser = Rc::new(crate::GrammarParser::new_unchecked(self));
crate::earley::parse_with_parser_rc(parser, input, Some(starting_term))
}

/// Get the starting term
Expand Down Expand Up @@ -938,6 +974,48 @@ mod tests {
);
}

#[test]
fn validate_fails_for_empty_grammar() {
let grammar = Grammar::from_parts(vec![]);
let result = grammar.validate();
assert!(result.is_err(), "validate should fail for empty grammar");
assert!(matches!(result.unwrap_err(), Error::ValidationError(_)));
}

#[test]
fn validate_succeeds_for_valid_grammar() {
let grammar: Grammar = "<start> ::= <a> | <b>
<a> ::= 'a'
<b> ::= 'b'"
.parse()
.unwrap();
assert!(grammar.validate().is_ok());
}

#[test]
fn validate_fails_for_undefined_nonterminal() {
let grammar: Grammar = "<start> ::= <a> | <b>
<a> ::= 'a'"
.parse()
.unwrap();
let result = grammar.validate();
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), Error::ValidationError(_)));
}

#[test]
fn validate_error_message_contains_undefined_nonterminal() {
let grammar: Grammar = "<start> ::= <undefined_nt>".parse().unwrap();
let err = grammar.validate().unwrap_err();
let Error::ValidationError(msg) = err else {
panic!("expected ValidationError");
};
assert!(
msg.contains("<undefined_nt>"),
"message should mention undefined nonterminal: {msg}"
);
}

#[test]
fn parse_error() {
let grammar: Result<Grammar, _> = "<almost_grammar> ::= <test".parse();
Expand Down Expand Up @@ -1026,7 +1104,7 @@ mod tests {
<c> ::= 'c'"
.parse()
.unwrap();
let valid: std::collections::HashSet<String> = ["a", "b", "ac", "bc"]
let valid: crate::HashSet<String> = ["a", "b", "ac", "bc"]
.into_iter()
.map(String::from)
.collect();
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod parsers;
mod production;
mod term;
mod tracing;
mod validation;
pub use crate::error::Error;
pub use crate::expression::Expression;
pub use crate::grammar::{Grammar, ParseTree, ParseTreeNode, escape_mermaid_label};
Expand All @@ -25,3 +26,4 @@ pub use parsers::ABNF;
pub use parsers::{BNF, Format};

pub(crate) use hashbrown::HashMap;
pub(crate) use hashbrown::HashSet;
Loading