-
-
Notifications
You must be signed in to change notification settings - Fork 107
Add line and column information to errors #410
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
efee991
72dc4a5
845be91
4247f30
fc1b218
885955b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,46 +18,124 @@ fn ws_tk<L: Lex>(lexer: &mut L) -> Option<u8> { | |
| } | ||
| } | ||
|
|
||
| /// Parse error. | ||
| /// Parse error with line and column information. | ||
| #[derive(Debug)] | ||
| pub struct Error(usize, hifijson::Error); | ||
| pub struct Error { | ||
| line: usize, | ||
| column: usize, | ||
| inner: hifijson::Error, | ||
| } | ||
|
|
||
| impl core::fmt::Display for Error { | ||
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { | ||
| write!(f, "byte offset {}: {}", self.0, self.1) | ||
| write!( | ||
| f, | ||
| "at line {}, column {}: {}", | ||
| self.line, self.column, self.inner | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(feature = "std")] | ||
| impl std::error::Error for Error {} | ||
|
|
||
| /// Convert a byte offset in a slice to a (line, column) pair (both 1-based). | ||
| /// Column counts Unicode characters, not bytes. | ||
| fn byte_offset_to_position(slice: &[u8], offset: usize) -> (usize, usize) { | ||
| let before = &slice[..offset]; | ||
| let line = before.iter().filter(|&&byte| byte == b'\n').count() + 1; | ||
| let last_newline = before.iter().rposition(|&byte| byte == b'\n'); | ||
| let line_start = last_newline.map_or(before, |pos| &before[pos + 1..]); | ||
| // TODO: replace with `!byte.is_utf8_continuation()` once stable. | ||
| let column = line_start | ||
| .iter() | ||
| .filter(|&&byte| byte & 0xC0 != 0x80) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a source to this magic trick? |
||
| .count() | ||
| + 1; | ||
| (line, column) | ||
| } | ||
|
|
||
| /// Parse exactly one JSON value. | ||
| pub fn parse_single(slice: &[u8]) -> Result<Val, Error> { | ||
| let offset = |rest: &[u8]| rest.as_ptr() as usize - slice.as_ptr() as usize; | ||
| let mut lexer = SliceLexer::new(slice); | ||
| lexer | ||
| .exactly_one(ws_tk, parse) | ||
| .map_err(|e| Error(offset(lexer.as_slice()), e)) | ||
| lexer.exactly_one(ws_tk, parse).map_err(|inner| { | ||
| let (line, column) = byte_offset_to_position(slice, offset(lexer.as_slice())); | ||
| Error { | ||
| line, | ||
| column, | ||
| inner, | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| /// Parse a sequence of JSON values. | ||
| pub fn parse_many(slice: &[u8]) -> impl Iterator<Item = Result<Val, Error>> + '_ { | ||
| let offset = |rest: &[u8]| rest.as_ptr() as usize - slice.as_ptr() as usize; | ||
| let mut lexer = SliceLexer::new(slice); | ||
| core::iter::from_fn(move || { | ||
| Some(parse(ws_tk(&mut lexer)?, &mut lexer).map_err(|e| Error(offset(lexer.as_slice()), e))) | ||
| Some(parse(ws_tk(&mut lexer)?, &mut lexer).map_err(|inner| { | ||
| let (line, column) = byte_offset_to_position(slice, offset(lexer.as_slice())); | ||
| Error { | ||
| line, | ||
| column, | ||
| inner, | ||
| } | ||
| })) | ||
| }) | ||
| } | ||
|
|
||
| #[cfg(feature = "std")] | ||
| /// Wrapper iterator that tracks line and column position as bytes flow through. | ||
| struct PositionTracker<I> { | ||
| inner: I, | ||
| position: alloc::rc::Rc<core::cell::Cell<(usize, usize)>>, | ||
| } | ||
|
|
||
| #[cfg(feature = "std")] | ||
| impl<I: Iterator<Item = io::Result<u8>>> Iterator for PositionTracker<I> { | ||
| type Item = io::Result<u8>; | ||
|
|
||
| fn next(&mut self) -> Option<Self::Item> { | ||
| let result = self.inner.next()?; | ||
| if let Ok(byte) = &result { | ||
| let (mut line, mut column) = self.position.get(); | ||
| if *byte == b'\n' { | ||
| line += 1; | ||
| column = 0; | ||
| } else if *byte & 0xC0 != 0x80 { | ||
| // TODO: replace with `!byte.is_utf8_continuation()` once stable. | ||
| column += 1; | ||
| } | ||
| self.position.set((line, column)); | ||
| } | ||
| Some(result) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(feature = "std")] | ||
| /// Read a sequence of JSON values. | ||
| pub fn read_many<'a>(read: impl io::BufRead + 'a) -> impl Iterator<Item = io::Result<Val>> + 'a { | ||
| let invalid_data = |e| io::Error::new(io::ErrorKind::InvalidData, e); | ||
| let mut lexer = hifijson::IterLexer::new(read.bytes()); | ||
| let invalid_data = |error| io::Error::new(io::ErrorKind::InvalidData, error); | ||
| let position = alloc::rc::Rc::new(core::cell::Cell::new((1usize, 0usize))); | ||
| let tracker = PositionTracker { | ||
| inner: read.bytes(), | ||
| position: alloc::rc::Rc::clone(&position), | ||
| }; | ||
| let mut lexer = hifijson::IterLexer::new(tracker); | ||
| core::iter::from_fn(move || { | ||
| let v = ws_tk(&mut lexer).map(|next| parse(next, &mut lexer).map_err(invalid_data)); | ||
| // always return I/O error if present, regardless of the output value! | ||
| lexer.error.take().map(Err).or(v) | ||
| let value = ws_tk(&mut lexer).map(|next| { | ||
| parse(next, &mut lexer).map_err(|inner| { | ||
| let (line, column) = position.get(); | ||
| invalid_data(Error { | ||
| line, | ||
| column, | ||
| inner, | ||
| }) | ||
| }) | ||
| }); | ||
| // Always return I/O error if present, regardless of the output value. | ||
| lexer.error.take().map(Err).or(value) | ||
| }) | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,19 +1,27 @@ | ||
| use std::{env, io, process, str}; | ||
|
|
||
| fn golden_test(args: &[&str], input: &str, out_ex: &str) -> io::Result<()> { | ||
| /// Run `jaq` with the given args, pipe `input` to stdin, and compare output. | ||
| /// When `expect_success` is true, asserts success and checks stdout; | ||
| /// when false, asserts failure and checks stderr. | ||
| fn golden_test(args: &[&str], input: &str, expect_success: bool, out_ex: &str) -> io::Result<()> { | ||
| let mut child = process::Command::new(env!("CARGO_BIN_EXE_jaq")) | ||
| .args(args) | ||
| .stdin(process::Stdio::piped()) | ||
| .stdout(process::Stdio::piped()) | ||
| .stderr(process::Stdio::piped()) | ||
| .spawn()?; | ||
|
|
||
| use io::Write; | ||
| child.stdin.take().unwrap().write_all(input.as_bytes())?; | ||
| let output = child.wait_with_output()?; | ||
| assert!(output.status.success()); | ||
| assert_eq!(output.status.success(), expect_success); | ||
|
|
||
| let out_act = str::from_utf8(&output.stdout).expect("invalid UTF-8 in output"); | ||
| // remove '\r' from output for compatibility with Windows | ||
| let out_act = if expect_success { | ||
| str::from_utf8(&output.stdout).expect("invalid UTF-8 in stdout") | ||
| } else { | ||
| str::from_utf8(&output.stderr).expect("invalid UTF-8 in stderr") | ||
| }; | ||
| // Remove '\r' from output for compatibility with Windows. | ||
| let out_act = out_act.replace('\r', ""); | ||
| if out_ex.trim() != out_act.trim() { | ||
| println!("Expected output:\n{}\n---", out_ex); | ||
|
|
@@ -27,7 +35,47 @@ macro_rules! test { | |
| ($name:ident, $args:expr, $input:expr, $output:expr) => { | ||
| #[test] | ||
| fn $name() -> io::Result<()> { | ||
| golden_test($args, $input, $output) | ||
| golden_test($args, $input, true, $output) | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| /// Like `golden_test`, but writes input to a temp file and passes it as a file argument. | ||
| /// This exercises the slice-based parsing path (`parse_many` / `byte_offset_to_position`) | ||
| /// rather than the iterator-based path (`read_many` / `PositionTracker`). | ||
| fn golden_test_file( | ||
| args: &[&str], | ||
| input: &str, | ||
| expect_success: bool, | ||
| out_ex: &str, | ||
| ) -> io::Result<()> { | ||
| use io::Write; | ||
| let mut file = tempfile::NamedTempFile::new()?; | ||
| file.write_all(input.as_bytes())?; | ||
| file.flush()?; | ||
|
|
||
| let path = file.path().to_str().expect("non-UTF-8 temp path"); | ||
| let path: String = path.to_string(); | ||
| let mut all_args: Vec<&str> = args.to_vec(); | ||
| all_args.push(&path); | ||
|
|
||
| golden_test(&all_args, "", expect_success, out_ex) | ||
| } | ||
|
|
||
| /// Generates two tests: one via stdin (stream-based `PositionTracker` path) | ||
| /// and one via a temp file (slice-based `byte_offset_to_position` path). | ||
| macro_rules! test_error { | ||
| ($name:ident, $args:expr, $input:expr, $stderr:expr) => { | ||
| paste::paste! { | ||
| #[test] | ||
| fn [<$name _stdin>]() -> io::Result<()> { | ||
| golden_test($args, $input, false, $stderr) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would actually just add |
||
| } | ||
|
|
||
| #[test] | ||
| fn [<$name _file>]() -> io::Result<()> { | ||
| golden_test_file($args, $input, false, $stderr) | ||
| } | ||
| } | ||
| }; | ||
| } | ||
|
|
@@ -183,3 +231,22 @@ test!( | |
| "0", | ||
| r#"["bcddd",[1,2],3]"# | ||
| ); | ||
|
|
||
| test_error!( | ||
| parse_error_missing_comma, | ||
| &["."], | ||
| r#"{ | ||
| "key1": 1, | ||
| "key2": "value", | ||
| "key3": false | ||
| "key4": null | ||
| }"#, | ||
| "Error: failed to parse: at line 5, column 5: comma or end of sequence expected\n" | ||
| ); | ||
|
|
||
| test_error!( | ||
| parse_error_utf8_column, | ||
| &["."], | ||
| r#"{"ąćę": 1 "b": 2}"#, | ||
| "Error: failed to parse: at line 1, column 11: comma or end of sequence expected\n" | ||
| ); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would make a type
LineCol = (usize, usize)and store that here, instead of having separate line and column fields. That would simplify the call sites ofbyte_offset_to_positionbelow.