Skip to content

Commit d91d2f0

Browse files
[codex] docs(sqlite): add JSON metadata ergonomics (0xPlaygrounds#1798)
* docs(sqlite): add JSON metadata ergonomics * docs(sqlite): document JSON metadata dependencies
1 parent 02e9cc9 commit d91d2f0

2 files changed

Lines changed: 97 additions & 2 deletions

File tree

crates/rig-sqlite/README.md

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,13 @@ Add the companion crate to your `Cargo.toml`, along with the rig-core crate:
2626
[dependencies]
2727
rig-sqlite = "0.2.6"
2828
rig-core = "0.37.0"
29+
serde = { version = "1", features = ["derive"] }
30+
serde_json = "1"
2931
```
3032

31-
You can also run `cargo add rig-sqlite rig-core` to add the most recent versions of the dependencies to your project.
33+
You can also run `cargo add rig-sqlite rig-core serde_json` and
34+
`cargo add serde --features derive` to add the most recent versions of the
35+
dependencies to your project.
3236

3337
See the [`/examples`](./examples) folder for usage examples.
3438

@@ -45,6 +49,52 @@ unsafe {
4549
}
4650
```
4751

52+
## Storing JSON Metadata
53+
54+
Declare JSON metadata columns with `Column::new("metadata", "JSON")` and store
55+
the value as `serde_json::Value`. Rig writes the value as JSON text and parses
56+
it back as structured JSON when documents are returned from vector searches.
57+
58+
```rust
59+
use rig_core::Embed;
60+
use rig_sqlite::{Column, ColumnValue, SqliteVectorStoreTable};
61+
use serde::{Deserialize, Serialize};
62+
63+
#[derive(Clone, Debug, Deserialize, Embed, Serialize)]
64+
struct Document {
65+
id: String,
66+
#[embed]
67+
text: String,
68+
metadata: serde_json::Value,
69+
}
70+
71+
impl SqliteVectorStoreTable for Document {
72+
fn name() -> &'static str {
73+
"documents"
74+
}
75+
76+
fn schema() -> Vec<Column> {
77+
vec![
78+
Column::new("id", "TEXT PRIMARY KEY"),
79+
Column::new("text", "TEXT"),
80+
Column::new("metadata", "JSON"),
81+
]
82+
}
83+
84+
fn id(&self) -> String {
85+
self.id.clone()
86+
}
87+
88+
fn column_values(&self) -> Vec<(&'static str, Box<dyn ColumnValue>)> {
89+
vec![
90+
("id", Box::new(self.id.clone())),
91+
("text", Box::new(self.text.clone())),
92+
("metadata", Box::new(self.metadata.clone())),
93+
]
94+
}
95+
}
96+
```
97+
4898
## Filtering JSON Metadata
4999

50100
SQLite filters can target document-table columns that store JSON text. Use

crates/rig-sqlite/src/lib.rs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub enum SqliteError {
3030
}
3131

3232
/// Value that can be stored in a SQLite vector store document column.
33+
///
34+
/// Use [`serde_json::Value`] for columns declared as `JSON`.
3335
pub trait ColumnValue: Send + Sync {
3436
/// Converts this value to a typed SQLite value.
3537
fn to_sql_value(&self) -> Value;
@@ -2274,6 +2276,16 @@ impl ColumnValue for bool {
22742276
}
22752277
}
22762278

2279+
impl ColumnValue for serde_json::Value {
2280+
fn to_sql_value(&self) -> Value {
2281+
Value::Text(self.to_string())
2282+
}
2283+
2284+
fn column_type(&self) -> &'static str {
2285+
"JSON"
2286+
}
2287+
}
2288+
22772289
#[cfg(test)]
22782290
mod tests {
22792291
use super::*;
@@ -2371,6 +2383,35 @@ mod tests {
23712383
Ok(())
23722384
}
23732385

2386+
#[test]
2387+
fn serde_json_value_column_value_round_trips_json_column() -> anyhow::Result<()> {
2388+
let value = serde_json::json!({
2389+
"knowledge_doc_id": 361,
2390+
"knowledge_id": 1,
2391+
"user_id": 1
2392+
});
2393+
anyhow::ensure!(
2394+
value.column_type() == "JSON",
2395+
"serde_json::Value should declare JSON column type"
2396+
);
2397+
2398+
let text = match value.to_sql_value() {
2399+
Value::Text(text) => text,
2400+
value => {
2401+
anyhow::bail!("serde_json::Value should serialize as JSON text, got {value:?}")
2402+
}
2403+
};
2404+
2405+
let column = Column::new("metadata", "JSON");
2406+
let round_trip = sqlite_column_value_to_json(0, &column, ValueRef::Text(text.as_bytes()))?;
2407+
anyhow::ensure!(
2408+
round_trip == value,
2409+
"serde_json::Value should round-trip through a JSON column, got {round_trip:?}"
2410+
);
2411+
2412+
Ok(())
2413+
}
2414+
23742415
fn filter_error<T: std::fmt::Debug>(
23752416
result: Result<T, FilterError>,
23762417
context: &str,
@@ -4890,7 +4931,11 @@ mod tests {
48904931
("id", Box::new(self.id.clone())),
48914932
(
48924933
"metadata",
4893-
Box::new(serde_json::to_string(&self.metadata).unwrap_or_default()),
4934+
Box::new(serde_json::json!({
4935+
"user_id": self.metadata.user_id,
4936+
"knowledge_id": self.metadata.knowledge_id,
4937+
"knowledge_doc_id": self.metadata.knowledge_doc_id,
4938+
})),
48944939
),
48954940
("title", Box::new(self.title.clone())),
48964941
]

0 commit comments

Comments
 (0)