From be354073054128046e8bea7d80a2420f0282aa67 Mon Sep 17 00:00:00 2001 From: matt rice Date: Wed, 14 May 2025 18:30:04 -0700 Subject: [PATCH 1/2] Fix a panic uncovered during testing `occupied_entry()` is documented as being for a specific use case of mutating the flags for the *key*, using `occupied_entry()` with `insert` runs afoul of this by assuming it can also return a value previously associated with the key. This is a case where the header's valueless key API doesn't coincide very well with the existing `std::collections` entry API. In a sense `Header` can be occupied with a key *without* any associated value. While for the `std::collections` entry API the existence of a key implies the existence of a value. This fixes the problematic usage of the API, but it would be good to revisit the non_standard `occupied_entry()` function, and try to make another API that is more misuse resistent such as perhaps `key_entry` which allows you to modify the flags associated with a key, but doesn't assume the existence of a value. --- nimbleparse/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index 3e4cd3410..e2bfdcebb 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -141,12 +141,12 @@ fn main() { }; let entry = match header.entry("yacckind".to_string()) { Entry::Occupied(_) => unreachable!("Header should be empty"), - Entry::Vacant(v) => v.occupied_entry(), + Entry::Vacant(v) => v, }; match matches.opt_str("y") { None => {} Some(s) => { - entry.insert(HeaderValue( + entry.insert_entry(HeaderValue( Location::CommandLine, Value::try_from(match &*s.to_lowercase() { "eco" => YaccKind::Eco, From 6912702c65148569b8c5e81385706b317e54d872 Mon Sep 17 00:00:00 2001 From: matt rice Date: Sun, 11 May 2025 15:28:33 -0700 Subject: [PATCH 2/2] Add line/column info to nimbleparses when printing `HeaderError`. Move lazy initialisation to using `OnceCell` inside the formatter. Restore some spanned error printing that was lost in commit 277ae03622cb956455d6e98ee5e7efa4f71b3855 when `Location` was still within `YaccGrammarError`. --- cfgrammar/src/lib/header.rs | 11 ++++- nimbleparse/src/diagnostics.rs | 56 ++++++++++++++----------- nimbleparse/src/main.rs | 75 ++++++++++++++++++---------------- 3 files changed, 82 insertions(+), 60 deletions(-) diff --git a/cfgrammar/src/lib/header.rs b/cfgrammar/src/lib/header.rs index c89f5c762..d591596be 100644 --- a/cfgrammar/src/lib/header.rs +++ b/cfgrammar/src/lib/header.rs @@ -3,7 +3,7 @@ use crate::{ yacc::{ parser::SpansKind, YaccGrammarError, YaccGrammarErrorKind, YaccKind, YaccOriginalActionKind, }, - Location, Span, + Location, Span, Spanned, }; use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; @@ -38,6 +38,15 @@ impl From> for YaccGrammarError { } } +impl Spanned for HeaderError { + fn spans(&self) -> &[Span] { + self.locations.as_slice() + } + fn spanskind(&self) -> SpansKind { + self.spanskind() + } +} + // This is essentially a tuple that needs a newtype so we can implement `From` for it. // Thus we aren't worried about it being `pub`. #[derive(Debug, PartialEq)] diff --git a/nimbleparse/src/diagnostics.rs b/nimbleparse/src/diagnostics.rs index 0888b7b70..024934f95 100644 --- a/nimbleparse/src/diagnostics.rs +++ b/nimbleparse/src/diagnostics.rs @@ -1,4 +1,4 @@ -use std::{error::Error, fmt::Display, path::Path, str::FromStr}; +use std::{cell::OnceCell, error::Error, fmt::Display, path::Path}; use cfgrammar::{ newlinecache::NewlineCache, @@ -16,7 +16,7 @@ use unicode_width::UnicodeWidthStr; pub struct SpannedDiagnosticFormatter<'a> { src: &'a str, path: &'a Path, - nlc: NewlineCache, + nlc: OnceCell, } fn pidx_prods_data(ast: &GrammarAST, pidx: PIdx) -> (Vec, Vec) @@ -36,11 +36,19 @@ where impl<'a> SpannedDiagnosticFormatter<'a> { #[allow(clippy::result_unit_err)] - pub fn new(src: &'a str, path: &'a Path) -> Result { - Ok(Self { + pub fn new(src: &'a str, path: &'a Path) -> Self { + Self { src, path, - nlc: NewlineCache::from_str(src)?, + nlc: OnceCell::new(), + } + } + + pub fn nlc(&self) -> &NewlineCache { + self.nlc.get_or_init(|| { + let mut nlc = NewlineCache::new(); + nlc.feed(self.src); + nlc }) } @@ -60,7 +68,7 @@ impl<'a> SpannedDiagnosticFormatter<'a> { pub fn file_location_msg(&self, msg: &str, span: Option) -> String { if let Some(span) = span { let (line, col) = self - .nlc + .nlc() .byte_to_line_num_and_col_num(self.src, span.start()) .unwrap_or((0, 0)); format!("{} at {}:{line}:{col}", msg, self.path.display()) @@ -85,11 +93,11 @@ impl<'a> SpannedDiagnosticFormatter<'a> { underline_c: char, ) -> String { let mut out = String::new(); - let (start_byte, end_byte) = self.nlc.span_line_bytes(span); + let (start_byte, end_byte) = self.nlc().span_line_bytes(span); // Produce an underline underneath a span which may cover multiple lines, and a message on the last line. let mut source_lines = self.src[start_byte..end_byte].lines().peekable(); while let Some(source_line) = source_lines.next() { - let (line_start_byte, _) = self.nlc.span_line_bytes(span); + let (line_start_byte, _) = self.nlc().span_line_bytes(span); let span_offset_from_start = span.start() - line_start_byte; // An underline bounded by the current line. @@ -99,7 +107,7 @@ impl<'a> SpannedDiagnosticFormatter<'a> { .min(span.start() + (source_line.len() - span_offset_from_start)), ); let (line_num, _) = self - .nlc + .nlc() .byte_to_line_num_and_col_num(self.src, span.start()) .expect("Span must correlate to a line in source"); // Print the line_num/source text for the line. @@ -137,13 +145,13 @@ impl<'a> SpannedDiagnosticFormatter<'a> { let mut spans = e.spans().iter().enumerate().peekable(); while let Some((span_num, span)) = spans.next() { let (line, _) = self - .nlc + .nlc() .byte_to_line_num_and_col_num(self.src, span.start()) .unwrap_or((0, 0)); let next_line = spans .peek() .map(|(_, span)| span) - .map(|s| self.nlc.byte_to_line_num(s.start()).unwrap_or(line)) + .map(|s| self.nlc().byte_to_line_num(s.start()).unwrap_or(line)) .unwrap_or(line); // Is this line contiguous with the next, if not prefix it with dots. let dots = if next_line - line > 1 { "..." } else { "" }; @@ -180,7 +188,7 @@ impl<'a> SpannedDiagnosticFormatter<'a> { ) -> String { let mut lines = spans .clone() - .map(|span| self.nlc.span_line_bytes(span)) + .map(|span| self.nlc().span_line_bytes(span)) .collect::>(); lines.dedup(); assert!(lines.len() == 1); @@ -192,10 +200,10 @@ impl<'a> SpannedDiagnosticFormatter<'a> { // ____ ___ // indent indent let (line_at_start, _) = self - .nlc + .nlc() .byte_to_line_num_and_col_num(self.src, first_span.start()) .expect("Span should correlate to a line in source"); - let (line_start_byte, end_byte) = self.nlc.span_line_bytes(*first_span); + let (line_start_byte, end_byte) = self.nlc().span_line_bytes(*first_span); // Print the line_num/source text for the line. out.push_str(&format!( "{}| {}\n", @@ -304,7 +312,7 @@ impl<'a> SpannedDiagnosticFormatter<'a> { let mut prod_lines = r_prod_spans .iter() - .map(|span| self.nlc.span_line_bytes(*span)) + .map(|span| self.nlc().span_line_bytes(*span)) .collect::>(); prod_lines.sort(); prod_lines.dedup(); @@ -312,7 +320,7 @@ impl<'a> SpannedDiagnosticFormatter<'a> { for lines in &prod_lines { let mut spans_on_line = Vec::new(); for span in &r_prod_spans { - if lines == &self.nlc.span_line_bytes(*span) { + if lines == &self.nlc().span_line_bytes(*span) { spans_on_line.push(*span) } } @@ -375,7 +383,7 @@ mod test { fn underline_multiline_span_test() { let s = "\naaaaaabbb\nbbb\nbbbb\n"; let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(s, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(s, &test_path); let span = Span::new(7, 7 + 12); let out = format!( @@ -413,7 +421,7 @@ mod test { fn underline_single_line_span_test() { let s = "\naaaaaabbb bbb bbbb\n"; let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(s, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(s, &test_path); let span = Span::new(7, 7 + 12); let out = format!( @@ -442,7 +450,7 @@ mod test { fn span_prefix() { let s = "\naaaaaabbb\nbbb\nbbbb\n"; let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(s, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(s, &test_path); // For raw string alignment. let mut out = String::from("\n"); // On occasion we want dots to signal that the lines are not contiguous. @@ -473,7 +481,7 @@ mod test { fn span_prefix_2() { let s = "\n\n\n\n\n\n\n\n\n\n\naaaaaabbb\nbbb\nbbbb\n"; let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(s, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(s, &test_path); let mut out = String::from("\n"); // On occasion we want dots to signal that the lines are not contiguous. out.push_str(&formatter.prefixed_underline_span_with_text( @@ -504,7 +512,7 @@ mod test { let crabs = " 🦀🦀🦀 "; let crustaceans = format!("\"{crabs}\n{crabs}\""); let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(&crustaceans, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(&crustaceans, &test_path); // For raw string alignment. let mut out = String::from("\n"); out.push_str(&formatter.underline_span_with_text( @@ -528,7 +536,7 @@ mod test { let lobster = "🦞"; let crustaceans = format!("{crab}{lobster}{crab}{crab}{lobster}"); let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(&crustaceans, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(&crustaceans, &test_path); // For raw string alignment. let mut out = String::from("\n"); out.push_str(&formatter.prefixed_underline_span_with_text( @@ -560,7 +568,7 @@ mod test { fn underline_single_line_spans_test() { let s = "\naaaaaabbb bbb bbbb\n"; let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(s, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(s, &test_path); let spans = [(7, 10), (11, 14), (15, 19)] .iter() .map(|(i, j): &(usize, usize)| Span::new(*i, *j)); @@ -617,7 +625,7 @@ mod test { let lobster = "🦞"; let crustaceans = format!("{crab}{lobster}{crab}{lobster}"); let test_path = PathBuf::from("test"); - let formatter = SpannedDiagnosticFormatter::new(&crustaceans, &test_path).unwrap(); + let formatter = SpannedDiagnosticFormatter::new(&crustaceans, &test_path); // For raw string alignment. let mut out = String::from("\n"); let spans = [ diff --git a/nimbleparse/src/main.rs b/nimbleparse/src/main.rs index e2bfdcebb..17ddc5581 100644 --- a/nimbleparse/src/main.rs +++ b/nimbleparse/src/main.rs @@ -1,7 +1,7 @@ mod diagnostics; use crate::diagnostics::*; use cfgrammar::{ - header::{GrmtoolsSectionParser, Header, HeaderValue, Value}, + header::{GrmtoolsSectionParser, Header, HeaderError, HeaderValue, Value}, markmap::Entry, yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind}, Location, Span, @@ -165,13 +165,13 @@ fn main() { let lex_l_path = PathBuf::from(&matches.free[0]); let lex_src = read_file(&lex_l_path); + let lex_diag = SpannedDiagnosticFormatter::new(&lex_src, &lex_l_path); let mut lexerdef = match LRNonStreamingLexerDef::>::from_str(&lex_src) { Ok(ast) => ast, Err(errs) => { - let formatter = SpannedDiagnosticFormatter::new(&lex_src, &lex_l_path).unwrap(); - eprintln!("{ERROR}{}", formatter.file_location_msg("", None)); + eprintln!("{ERROR}{}", lex_diag.file_location_msg("", None)); for e in errs { - eprintln!("{}", indent(&formatter.format_error(e).to_string(), " ")); + eprintln!("{}", indent(&lex_diag.format_error(e).to_string(), " ")); } process::exit(1); } @@ -179,6 +179,7 @@ fn main() { let yacc_y_path = PathBuf::from(&matches.free[1]); let yacc_src = read_file(&yacc_y_path); + let yacc_diag = SpannedDiagnosticFormatter::new(&yacc_src, &yacc_y_path); let yk_val = header.get("yacckind"); if yk_val.is_none() { let parsed_header = GrmtoolsSectionParser::new(&yacc_src, true).parse(); @@ -190,12 +191,12 @@ fn main() { } Err(errs) => { eprintln!( - "Error(s) parsing `%grmtools` section:\n {}\n", - errs.iter() - .map(|e| e.to_string()) - .collect::>() - .join("\n") + "{ERROR}{}", + yacc_diag.file_location_msg(" parsing the `%grmtools` section:", None) ); + for e in errs { + eprintln!("{}", indent(&yacc_diag.format_error(e).to_string(), " ")); + } std::process::exit(1); } } @@ -211,7 +212,25 @@ fn main() { let recoverykind = if let Some(HeaderValue(_, rk_val)) = header.get("recoverer") { match RecoveryKind::try_from(rk_val) { Err(e) => { - eprintln!("{e}"); + eprintln!( + "{ERROR}{}", + yacc_diag.file_location_msg(" parsing the `%grmtools` section:", None) + ); + let spanned_e: HeaderError = HeaderError { + kind: e.kind, + locations: e + .locations + .iter() + .map(|l| match l { + Location::Span(span) => *span, + _ => unreachable!("All reachable errors should contain spans"), + }) + .collect::>(), + }; + eprintln!( + "{}", + indent(&yacc_diag.format_error(spanned_e).to_string(), " ") + ); process::exit(1) } Ok(rk) => rk, @@ -222,28 +241,24 @@ fn main() { }; let warnings = ast_validation.ast().warnings(); let res = YaccGrammar::new_from_ast_with_validity_info(&ast_validation); - let mut yacc_diagnostic_formatter: Option = None; let grm = match res { Ok(x) => { if !warnings.is_empty() { - let formatter = SpannedDiagnosticFormatter::new(&yacc_src, &yacc_y_path).unwrap(); - eprintln!("{WARNING}{}", formatter.file_location_msg("", None)); + eprintln!("{WARNING}{}", yacc_diag.file_location_msg("", None)); for w in warnings { - eprintln!("{}", indent(&formatter.format_warning(w), " ")); + eprintln!("{}", indent(&yacc_diag.format_warning(w), " ")); } - yacc_diagnostic_formatter = Some(formatter); } x } Err(errs) => { - let formatter = SpannedDiagnosticFormatter::new(&yacc_src, &yacc_y_path).unwrap(); - eprintln!("{ERROR}{}", formatter.file_location_msg("", None)); + eprintln!("{ERROR}{}", yacc_diag.file_location_msg("", None)); for e in errs { - eprintln!("{}", indent(&e.to_string(), " ")); + eprintln!("{}", indent(&yacc_diag.format_error(e).to_string(), " ")); } - eprintln!("{WARNING}{}", formatter.file_location_msg("", None)); + eprintln!("{WARNING}{}", yacc_diag.file_location_msg("", None)); for w in warnings { - eprintln!("{}", indent(&formatter.format_warning(w), " ")); + eprintln!("{}", indent(&yacc_diag.format_warning(w), " ")); } process::exit(1); } @@ -262,14 +277,6 @@ fn main() { if !quiet { if let Some(c) = stable.conflicts() { - let formatter = if let Some(yacc_diagnostic_formatter) = &yacc_diagnostic_formatter { - yacc_diagnostic_formatter - } else { - let formatter = SpannedDiagnosticFormatter::new(&yacc_src, &yacc_y_path).unwrap(); - yacc_diagnostic_formatter = Some(formatter); - yacc_diagnostic_formatter.as_ref().unwrap() - }; - let pp_rr = if let Some(i) = grm.expectrr() { i != c.rr_len() } else { @@ -289,7 +296,7 @@ fn main() { if (pp_rr || pp_sr) && !dump_state_graph { println!("Stategraph:\n{}\n", sgraph.pp_core_states(&grm)); } - formatter.handle_conflicts::>( + yacc_diag.handle_conflicts::>( &grm, ast_validation.ast(), c, @@ -303,19 +310,18 @@ fn main() { { if !quiet { if let Some(token_spans) = missing_from_lexer { - let formatter = SpannedDiagnosticFormatter::new(&yacc_src, &yacc_y_path).unwrap(); let warn_indent = " ".repeat(WARNING.len()); eprintln!( "{WARNING} these tokens are not referenced in the lexer but defined as follows" ); eprintln!( "{warn_indent} {}", - formatter.file_location_msg("in the grammar", None) + yacc_diag.file_location_msg("in the grammar", None) ); for span in token_spans { eprintln!( "{}", - formatter.underline_span_with_text( + yacc_diag.underline_span_with_text( span, "Missing from lexer".to_string(), '^' @@ -325,19 +331,18 @@ fn main() { eprintln!(); } if let Some(token_spans) = missing_from_parser { - let formatter = SpannedDiagnosticFormatter::new(&lex_src, &lex_l_path).unwrap(); let err_indent = " ".repeat(ERROR.len()); eprintln!( "{ERROR} these tokens are not referenced in the grammar but defined as follows" ); eprintln!( "{err_indent} {}", - formatter.file_location_msg("in the lexer", None) + lex_diag.file_location_msg("in the lexer", None) ); for span in token_spans { eprintln!( "{}", - formatter.underline_span_with_text( + lex_diag.underline_span_with_text( span, "Missing from parser".to_string(), '^'