Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions jaq-fmts/tests/formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn jc(json: &str, cbor_hex: &str) {

let cbor_bin = decode_hex(cbor_hex).unwrap();
//dbg!(&cbor_bin);
let cbor_val = unwrap_collect(read::cbor::parse_many(&*cbor_bin));
let cbor_val = unwrap_collect(read::cbor::parse_many(&cbor_bin));
assert_eq!(json_val, cbor_val);

let mut cbor_bin2 = Vec::new();
Expand All @@ -28,13 +28,13 @@ fn jc(json: &str, cbor_hex: &str) {
}
//dbg!(&cbor_bin2);

let cbor_val2 = unwrap_collect(read::cbor::parse_many(&*cbor_bin2));
let cbor_val2 = unwrap_collect(read::cbor::parse_many(&cbor_bin2));
assert_eq!(cbor_val, cbor_val2);
}

fn cbor_fail(cbor_hex: &str) {
let cbor_bin = decode_hex(cbor_hex).unwrap();
let iter = read::cbor::parse_many(&*cbor_bin);
let iter = read::cbor::parse_many(&cbor_bin);
assert!(iter.collect::<Result<Vec<_>, _>>().is_err())
}

Expand Down
102 changes: 90 additions & 12 deletions jaq-json/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,124 @@ fn ws_tk<L: Lex>(lexer: &mut L) -> Option<u8> {
}
}

/// Parse error.
/// Parse error with line and column information.
#[derive(Debug)]
pub struct Error(usize, hifijson::Error);
pub struct Error {
line: usize,
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would make a type LineCol = (usize, usize) and store that here, instead of having separate line and column fields. That would simplify the call sites of byte_offset_to_position below.

column: usize,
inner: hifijson::Error,
}

impl core::fmt::Display for Error {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "byte offset {}: {}", self.0, self.1)
write!(
f,
"at line {}, column {}: {}",
self.line, self.column, self.inner
)
}
}

#[cfg(feature = "std")]
impl std::error::Error for Error {}

/// Convert a byte offset in a slice to a (line, column) pair (both 1-based).
/// Column counts Unicode characters, not bytes.
fn byte_offset_to_position(slice: &[u8], offset: usize) -> (usize, usize) {
let before = &slice[..offset];
let line = before.iter().filter(|&&byte| byte == b'\n').count() + 1;
let last_newline = before.iter().rposition(|&byte| byte == b'\n');
let line_start = last_newline.map_or(before, |pos| &before[pos + 1..]);
// TODO: replace with `!byte.is_utf8_continuation()` once stable.
let column = line_start
.iter()
.filter(|&&byte| byte & 0xC0 != 0x80)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a source to this magic trick?

.count()
+ 1;
(line, column)
}

/// Parse exactly one JSON value.
pub fn parse_single(slice: &[u8]) -> Result<Val, Error> {
let offset = |rest: &[u8]| rest.as_ptr() as usize - slice.as_ptr() as usize;
let mut lexer = SliceLexer::new(slice);
lexer
.exactly_one(ws_tk, parse)
.map_err(|e| Error(offset(lexer.as_slice()), e))
lexer.exactly_one(ws_tk, parse).map_err(|inner| {
let (line, column) = byte_offset_to_position(slice, offset(lexer.as_slice()));
Error {
line,
column,
inner,
}
})
}

/// Parse a sequence of JSON values.
pub fn parse_many(slice: &[u8]) -> impl Iterator<Item = Result<Val, Error>> + '_ {
let offset = |rest: &[u8]| rest.as_ptr() as usize - slice.as_ptr() as usize;
let mut lexer = SliceLexer::new(slice);
core::iter::from_fn(move || {
Some(parse(ws_tk(&mut lexer)?, &mut lexer).map_err(|e| Error(offset(lexer.as_slice()), e)))
Some(parse(ws_tk(&mut lexer)?, &mut lexer).map_err(|inner| {
let (line, column) = byte_offset_to_position(slice, offset(lexer.as_slice()));
Error {
line,
column,
inner,
}
}))
})
}

#[cfg(feature = "std")]
/// Wrapper iterator that tracks line and column position as bytes flow through.
struct PositionTracker<I> {
inner: I,
position: alloc::rc::Rc<core::cell::Cell<(usize, usize)>>,
}

#[cfg(feature = "std")]
impl<I: Iterator<Item = io::Result<u8>>> Iterator for PositionTracker<I> {
type Item = io::Result<u8>;

fn next(&mut self) -> Option<Self::Item> {
let result = self.inner.next()?;
if let Ok(byte) = &result {
let (mut line, mut column) = self.position.get();
if *byte == b'\n' {
line += 1;
column = 0;
} else if *byte & 0xC0 != 0x80 {
// TODO: replace with `!byte.is_utf8_continuation()` once stable.
column += 1;
}
self.position.set((line, column));
}
Some(result)
}
}

#[cfg(feature = "std")]
/// Read a sequence of JSON values.
pub fn read_many<'a>(read: impl io::BufRead + 'a) -> impl Iterator<Item = io::Result<Val>> + 'a {
let invalid_data = |e| io::Error::new(io::ErrorKind::InvalidData, e);
let mut lexer = hifijson::IterLexer::new(read.bytes());
let invalid_data = |error| io::Error::new(io::ErrorKind::InvalidData, error);
let position = alloc::rc::Rc::new(core::cell::Cell::new((1usize, 0usize)));
let tracker = PositionTracker {
inner: read.bytes(),
position: alloc::rc::Rc::clone(&position),
};
let mut lexer = hifijson::IterLexer::new(tracker);
core::iter::from_fn(move || {
let v = ws_tk(&mut lexer).map(|next| parse(next, &mut lexer).map_err(invalid_data));
// always return I/O error if present, regardless of the output value!
lexer.error.take().map(Err).or(v)
let value = ws_tk(&mut lexer).map(|next| {
parse(next, &mut lexer).map_err(|inner| {
let (line, column) = position.get();
invalid_data(Error {
line,
column,
inner,
})
})
});
// Always return I/O error if present, regardless of the output value.
lexer.error.take().map(Err).or(value)
})
}

Expand Down
3 changes: 3 additions & 0 deletions jaq/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@ log = { version = "0.4.17" }
mimalloc = { version = "0.1.29", default-features = false, optional = true }
rustyline = { version = "13.0.0", default-features = false, features = ["with-file-history"] }
tempfile = "3.3.0"

[dev-dependencies]
paste = "1"
77 changes: 72 additions & 5 deletions jaq/tests/golden.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
use std::{env, io, process, str};

fn golden_test(args: &[&str], input: &str, out_ex: &str) -> io::Result<()> {
/// Run `jaq` with the given args, pipe `input` to stdin, and compare output.
/// When `expect_success` is true, asserts success and checks stdout;
/// when false, asserts failure and checks stderr.
fn golden_test(args: &[&str], input: &str, expect_success: bool, out_ex: &str) -> io::Result<()> {
let mut child = process::Command::new(env!("CARGO_BIN_EXE_jaq"))
.args(args)
.stdin(process::Stdio::piped())
.stdout(process::Stdio::piped())
.stderr(process::Stdio::piped())
.spawn()?;

use io::Write;
child.stdin.take().unwrap().write_all(input.as_bytes())?;
let output = child.wait_with_output()?;
assert!(output.status.success());
assert_eq!(output.status.success(), expect_success);

let out_act = str::from_utf8(&output.stdout).expect("invalid UTF-8 in output");
// remove '\r' from output for compatibility with Windows
let out_act = if expect_success {
str::from_utf8(&output.stdout).expect("invalid UTF-8 in stdout")
} else {
str::from_utf8(&output.stderr).expect("invalid UTF-8 in stderr")
};
// Remove '\r' from output for compatibility with Windows.
let out_act = out_act.replace('\r', "");
if out_ex.trim() != out_act.trim() {
println!("Expected output:\n{}\n---", out_ex);
Expand All @@ -27,7 +35,47 @@ macro_rules! test {
($name:ident, $args:expr, $input:expr, $output:expr) => {
#[test]
fn $name() -> io::Result<()> {
golden_test($args, $input, $output)
golden_test($args, $input, true, $output)
}
};
}

/// Like `golden_test`, but writes input to a temp file and passes it as a file argument.
/// This exercises the slice-based parsing path (`parse_many` / `byte_offset_to_position`)
/// rather than the iterator-based path (`read_many` / `PositionTracker`).
fn golden_test_file(
args: &[&str],
input: &str,
expect_success: bool,
out_ex: &str,
) -> io::Result<()> {
use io::Write;
let mut file = tempfile::NamedTempFile::new()?;
file.write_all(input.as_bytes())?;
file.flush()?;

let path = file.path().to_str().expect("non-UTF-8 temp path");
let path: String = path.to_string();
let mut all_args: Vec<&str> = args.to_vec();
all_args.push(&path);

golden_test(&all_args, "", expect_success, out_ex)
}

/// Generates two tests: one via stdin (stream-based `PositionTracker` path)
/// and one via a temp file (slice-based `byte_offset_to_position` path).
macro_rules! test_error {
($name:ident, $args:expr, $input:expr, $stderr:expr) => {
paste::paste! {
#[test]
fn [<$name _stdin>]() -> io::Result<()> {
golden_test($args, $input, false, $stderr)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would actually just add golden_test_file after golden_test in the test! macro, and delete test_error.

}

#[test]
fn [<$name _file>]() -> io::Result<()> {
golden_test_file($args, $input, false, $stderr)
}
}
};
}
Expand Down Expand Up @@ -183,3 +231,22 @@ test!(
"0",
r#"["bcddd",[1,2],3]"#
);

test_error!(
parse_error_missing_comma,
&["."],
r#"{
"key1": 1,
"key2": "value",
"key3": false
"key4": null
}"#,
"Error: failed to parse: at line 5, column 5: comma or end of sequence expected\n"
);

test_error!(
parse_error_utf8_column,
&["."],
r#"{"ąćę": 1 "b": 2}"#,
"Error: failed to parse: at line 1, column 11: comma or end of sequence expected\n"
);