Skip to content

Commit 474eb20

Browse files
committed
Make fromjson/fromcbor/... return stream of values.
Also move named filters from jaq-json to own module.
1 parent 4955ed5 commit 474eb20

File tree

9 files changed

+320
-276
lines changed

9 files changed

+320
-276
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

jaq-json/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ indexmap = { version = "2.0", default-features = false }
3333
num-bigint = { version = "0.4.6", default-features = false }
3434
num-traits = { version = "0.2.19", default-features = false }
3535
ryu = "1.0.20"
36+
self_cell = "1.2.0"
3637
serde_json = { version = "1.0.81", default-features = false, features = ["alloc"], optional = true }
3738

3839
# parsers

jaq-json/src/cbor.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,6 @@ impl From<PError<io::Error>> for io::Error {
7474
}
7575
}
7676

77-
/// Parse exactly one CBOR value.
78-
pub fn parse_single(b: &[u8]) -> Result<Val, BoxError> {
79-
parse_one(b).map_err(Into::into)
80-
}
81-
8277
/// Parse a sequence of CBOR values.
8378
pub fn parse_many(b: &[u8]) -> impl Iterator<Item = Result<Val, BoxError>> + '_ {
8479
decode_many(b).map(|r| r.map_err(Into::into))
@@ -94,13 +89,6 @@ pub fn write(w: &mut dyn io::Write, v: &Val) -> io::Result<()> {
9489
write_one(v, w)
9590
}
9691

97-
/// Decode a single CBOR value.
98-
fn parse_one<R: Read>(read: R) -> Result<Val, PError<R::Error>> {
99-
let mut decoder = Decoder::from(read);
100-
let header = decoder.pull()?;
101-
parse(header, &mut decoder)
102-
}
103-
10492
/// Decode a sequence of CBOR values.
10593
fn decode_many<R: Read>(read: R) -> impl Iterator<Item = Result<Val, PError<R::Error>>>
10694
where

jaq-json/src/funs.rs

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
use crate::{cbor, json, toml, xml, yaml};
2+
use crate::{Error, Num, Tag, Type, Val, ValR, ValX};
3+
use alloc::{boxed::Box, string::ToString, vec::Vec};
4+
use bstr::ByteSlice;
5+
use bytes::{BufMut, Bytes, BytesMut};
6+
use core::fmt;
7+
use jaq_core::box_iter::{box_once, then, BoxIter};
8+
use jaq_core::{DataT, Exn, Native, RunPtr};
9+
use jaq_std::{run, unary, v, Filter, ValT as _};
10+
11+
impl Val {
12+
/// Return 0 for null, the absolute value for numbers, and
13+
/// the length for strings, arrays, and objects.
14+
///
15+
/// Fail on booleans.
16+
fn length(&self) -> ValR {
17+
match self {
18+
Val::Null => Ok(Val::from(0usize)),
19+
Val::Num(n) => Ok(Val::Num(n.length())),
20+
Val::Str(s, Tag::Utf8) => Ok(Val::from(s.chars().count() as isize)),
21+
Val::Str(b, Tag::Bytes | Tag::Raw) => Ok(Val::from(b.len() as isize)),
22+
Val::Arr(a) => Ok(Val::from(a.len() as isize)),
23+
Val::Obj(o) => Ok(Val::from(o.len() as isize)),
24+
Val::Bool(_) => Err(Error::str(format_args!("{self} has no length"))),
25+
}
26+
}
27+
28+
/// Return the indices of `y` in `self`.
29+
fn indices<'a>(&'a self, y: &'a Val) -> Result<Box<dyn Iterator<Item = usize> + 'a>, Error> {
30+
match (self, y) {
31+
(Val::Str(_, tag @ (Tag::Bytes | Tag::Utf8)), Val::Str(y, tag_))
32+
if tag == tag_ && y.is_empty() =>
33+
{
34+
Ok(Box::new(core::iter::empty()))
35+
}
36+
(Val::Arr(_), Val::Arr(y)) if y.is_empty() => Ok(Box::new(core::iter::empty())),
37+
(Val::Str(x, Tag::Utf8), Val::Str(y, Tag::Utf8)) => {
38+
let index = |(i, _, _)| x.get(i..i + y.len());
39+
let iw = x.char_indices().map_while(index).enumerate();
40+
Ok(Box::new(iw.filter_map(|(i, w)| (w == *y).then_some(i))))
41+
}
42+
(Val::Str(x, tag @ Tag::Bytes), Val::Str(y, tag_)) if tag == tag_ => {
43+
let iw = x.windows(y.len()).enumerate();
44+
Ok(Box::new(iw.filter_map(|(i, w)| (w == *y).then_some(i))))
45+
}
46+
(Val::Arr(x), Val::Arr(y)) => {
47+
let iw = x.windows(y.len()).enumerate();
48+
Ok(Box::new(iw.filter_map(|(i, w)| (w == **y).then_some(i))))
49+
}
50+
(Val::Arr(x), y) => {
51+
let ix = x.iter().enumerate();
52+
Ok(Box::new(ix.filter_map(move |(i, x)| (x == y).then_some(i))))
53+
}
54+
(x, y) => Err(Error::index(x.clone(), y.clone())),
55+
}
56+
}
57+
58+
/// Return true if `value | .[key]` is defined.
59+
///
60+
/// Fail on values that are neither binaries, arrays nor objects.
61+
fn has(&self, key: &Self) -> Result<bool, Error> {
62+
match (self, key) {
63+
(Self::Str(a, Tag::Bytes), Self::Num(Num::Int(i))) if *i >= 0 => {
64+
Ok((*i as usize) < a.len())
65+
}
66+
(Self::Arr(a), Self::Num(Num::Int(i))) if *i >= 0 => Ok((*i as usize) < a.len()),
67+
(a @ (Self::Str(_, Tag::Bytes) | Self::Arr(_)), Self::Num(Num::BigInt(i))) => {
68+
a.has(&Self::from(crate::bigint_to_int_saturated(i)))
69+
}
70+
(Self::Obj(o), k) => Ok(o.contains_key(k)),
71+
_ => Err(Error::index(self.clone(), key.clone())),
72+
}
73+
}
74+
75+
/// `a` contains `b` iff either
76+
/// * the string `b` is a substring of `a`,
77+
/// * every element in the array `b` is contained in some element of the array `a`,
78+
/// * for every key-value pair `k, v` in `b`,
79+
/// there is a key-value pair `k, v'` in `a` such that `v'` contains `v`, or
80+
/// * `a` equals `b`.
81+
fn contains(&self, other: &Self) -> bool {
82+
match (self, other) {
83+
(Self::Str(l, tag), Self::Str(r, tag_)) if tag == tag_ => l.contains_str(r),
84+
(Self::Arr(l), Self::Arr(r)) => r.iter().all(|r| l.iter().any(|l| l.contains(r))),
85+
(Self::Obj(l), Self::Obj(r)) => r
86+
.iter()
87+
.all(|(k, r)| l.get(k).is_some_and(|l| l.contains(r))),
88+
_ => self == other,
89+
}
90+
}
91+
92+
fn to_bytes(&self) -> Result<Bytes, Self> {
93+
match self {
94+
Val::Num(n) => n
95+
.as_isize()
96+
.and_then(|i| u8::try_from(i).ok())
97+
.map(|u| Bytes::from(Vec::from([u])))
98+
.ok_or_else(|| self.clone()),
99+
Val::Str(b, _) => Ok(b.clone()),
100+
Val::Arr(a) => {
101+
let mut buf = BytesMut::new();
102+
for x in a.iter() {
103+
buf.put(Val::to_bytes(x)?);
104+
}
105+
Ok(buf.into())
106+
}
107+
_ => Err(self.clone()),
108+
}
109+
}
110+
111+
fn as_bytes_owned(&self) -> Option<Bytes> {
112+
if let Self::Str(b, _) = self {
113+
Some(b.clone())
114+
} else {
115+
None
116+
}
117+
}
118+
119+
fn as_utf8_bytes_owned(&self) -> Option<Bytes> {
120+
self.is_utf8_str().then(|| self.as_bytes_owned()).flatten()
121+
}
122+
123+
fn try_as_bytes_owned(&self) -> Result<Bytes, Error> {
124+
self.as_bytes_owned()
125+
.ok_or_else(|| Error::typ(self.clone(), Type::Str.as_str()))
126+
}
127+
128+
fn try_as_utf8_bytes_owned(&self) -> Result<Bytes, Error> {
129+
self.as_utf8_bytes_owned()
130+
.ok_or_else(|| Error::typ(self.clone(), Type::Str.as_str()))
131+
}
132+
}
133+
134+
/// Box Once, Map Error.
135+
fn bome<'a>(r: ValR) -> BoxIter<'a, ValX> {
136+
box_once(r.map_err(Exn::from))
137+
}
138+
139+
/// Box Map, Map Error.
140+
fn bmme<'a>(iter: BoxIter<'a, ValR>) -> BoxIter<'a, ValX> {
141+
Box::new(iter.map(|r| r.map_err(Exn::from)))
142+
}
143+
144+
/// Functions of the standard library.
145+
#[cfg(feature = "parse")]
146+
pub fn funs<D: for<'a> DataT<V<'a> = Val>>() -> impl Iterator<Item = Filter<Native<D>>> {
147+
base_funs().chain(parse_funs().into_vec().into_iter().map(run))
148+
}
149+
150+
/// Minimal set of filters.
151+
pub fn base_funs<D: for<'a> DataT<V<'a> = Val>>() -> impl Iterator<Item = Filter<Native<D>>> {
152+
base().into_vec().into_iter().map(run)
153+
}
154+
155+
fn base<D: for<'a> DataT<V<'a> = Val>>() -> Box<[Filter<RunPtr<D>>]> {
156+
Box::new([
157+
("tojson", v(0), |cv| {
158+
let mut buf = Vec::new();
159+
json::write(&mut buf, &cv.1).unwrap();
160+
box_once(Ok(Val::from_utf8_bytes(buf)))
161+
}),
162+
("tobytes", v(0), |cv| {
163+
let pass = |b| Val::Str(b, Tag::Bytes);
164+
let fail = |v| Error::str(format_args!("cannot convert {v} to bytes"));
165+
bome(cv.1.to_bytes().map(pass).map_err(fail))
166+
}),
167+
("torawstring", v(0), |cv| {
168+
box_once(Ok(match cv.1 {
169+
Val::Str(s, _) => Val::Str(s, Tag::Raw),
170+
v => Val::Str(v.to_string().into(), Tag::Raw),
171+
}))
172+
}),
173+
("length", v(0), |cv| bome(cv.1.length())),
174+
("contains", v(1), |cv| {
175+
unary(cv, |x, y| Ok(Val::from(x.contains(&y))))
176+
}),
177+
("has", v(1), |cv| unary(cv, |v, k| v.has(&k).map(Val::from))),
178+
("indices", v(1), |cv| {
179+
let to_int = |i: usize| Val::from(i as isize);
180+
unary(cv, move |x, v| {
181+
x.indices(&v).map(|idxs| idxs.map(to_int).collect())
182+
})
183+
}),
184+
("bsearch", v(1), |cv| {
185+
let to_idx = |r: Result<_, _>| r.map_or_else(|i| -1 - i as isize, |i| i as isize);
186+
unary(cv, move |a, x| {
187+
a.as_arr().map(|a| Val::from(to_idx(a.binary_search(&x))))
188+
})
189+
}),
190+
])
191+
}
192+
193+
fn parse_fail(i: &impl fmt::Display, fmt: &str, e: impl fmt::Display) -> Error {
194+
Error::str(format_args!("cannot parse {i} as {fmt}: {e}"))
195+
}
196+
197+
fn serialise_fail(i: &impl fmt::Display, fmt: &str, e: impl fmt::Display) -> Error {
198+
Error::str(format_args!("cannot serialise {i} as {fmt}: {e}"))
199+
}
200+
201+
self_cell::self_cell!(
202+
struct BytesValRs {
203+
owner: Bytes,
204+
205+
#[not_covariant]
206+
dependent: ValRs,
207+
}
208+
);
209+
210+
impl Iterator for BytesValRs {
211+
type Item = ValR;
212+
fn next(&mut self) -> Option<Self::Item> {
213+
self.with_dependent_mut(|_owner, iter| iter.next())
214+
}
215+
}
216+
217+
type ValRs<'a> = BoxIter<'a, ValR>;
218+
219+
fn parse_byte_str(b: Bytes, parse: impl FnOnce(&str) -> ValRs) -> ValRs<'static> {
220+
Box::new(BytesValRs::new(b, |b| {
221+
then(core::str::from_utf8(b).map_err(Error::str), parse)
222+
}))
223+
}
224+
225+
fn parse_bytes(b: Bytes, parse: impl FnOnce(&[u8]) -> ValRs) -> ValRs<'static> {
226+
Box::new(BytesValRs::new(b, |b| parse(b)))
227+
}
228+
229+
#[cfg(feature = "parse")]
230+
fn parse_funs<D: for<'a> DataT<V<'a> = Val>>() -> Box<[Filter<RunPtr<D>>]> {
231+
Box::new([
232+
("fromjson", v(0), |cv| {
233+
bmme(then(cv.1.try_as_utf8_bytes_owned(), |s| {
234+
let fail = move |r: Result<_, _>| r.map_err(|e| parse_fail(&cv.1, "JSON", e));
235+
parse_bytes(s, |s| Box::new(json::parse_many(s).map(fail)))
236+
}))
237+
}),
238+
("fromcbor", v(0), |cv| {
239+
bmme(then(cv.1.try_as_bytes_owned(), |s| {
240+
let fail = move |r: Result<_, _>| r.map_err(|e| parse_fail(&cv.1, "CBOR", e));
241+
parse_bytes(s, |s| Box::new(cbor::parse_many(s).map(fail)))
242+
}))
243+
}),
244+
("fromyaml", v(0), |cv| {
245+
bmme(then(cv.1.try_as_utf8_bytes_owned(), |s| {
246+
let fail = move |r: Result<_, _>| r.map_err(|e| parse_fail(&cv.1, "YAML", e));
247+
parse_byte_str(s, |s| Box::new(yaml::parse_many(s).map(fail)))
248+
}))
249+
}),
250+
("fromxml", v(0), |cv| {
251+
bmme(then(cv.1.try_as_utf8_bytes_owned(), |s| {
252+
let fail = move |r: Result<_, _>| r.map_err(|e| parse_fail(&cv.1, "XML", e));
253+
parse_byte_str(s, |s| Box::new(xml::parse_many(s).map(fail)))
254+
}))
255+
}),
256+
("fromtoml", v(0), |cv| {
257+
let from_utf8 = |b| core::str::from_utf8(b).map_err(Error::str);
258+
let parse = |b| toml::parse(b).map_err(|e| parse_fail(&cv.1, "TOML", e));
259+
bome(cv.1.try_as_utf8_bytes().and_then(from_utf8).and_then(parse))
260+
}),
261+
("tocbor", v(0), |cv| {
262+
let mut buf = Vec::new();
263+
cbor::write(&mut buf, &cv.1).unwrap();
264+
bome(Ok(Val::byte_str(buf)))
265+
}),
266+
("toyaml", v(0), |cv| {
267+
let mut buf = Vec::new();
268+
yaml::write(&mut buf, &cv.1).unwrap();
269+
box_once(Ok(Val::from_utf8_bytes(buf)))
270+
}),
271+
("totoml", v(0), |cv| {
272+
let ser = toml::serialise(&cv.1).map_err(|e| serialise_fail(&cv.1, "TOML", e));
273+
bome(ser.map(|ser| Val::utf8_str(ser.to_string())))
274+
}),
275+
("toxml", v(0), |cv| {
276+
let ser = xml::serialise(&cv.1).map_err(|e| serialise_fail(&cv.1, "XML", e));
277+
bome(ser.map(|ser| Val::utf8_str(ser.to_string())))
278+
}),
279+
])
280+
}

0 commit comments

Comments
 (0)