Skip to content

Commit 36ff88a

Browse files
feat: add SQLite metadata provider support (#39)
1 parent 9e4e682 commit 36ff88a

9 files changed

Lines changed: 1583 additions & 8 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ skip-tests-with-docker = []
5252
metadata-duckdb = ["dep:duckdb"]
5353
metadata-postgres = ["dep:sqlx", "sqlx/postgres", "sqlx/chrono"]
5454
metadata-mysql = ["dep:sqlx", "sqlx/mysql", "sqlx/chrono"]
55-
# Future: metadata-sqlite = ["sqlx", "sqlx/sqlite", "sqlx/chrono"]
55+
metadata-sqlite = ["dep:sqlx", "sqlx/sqlite", "sqlx/chrono"]
5656

5757
# Encryption support for Parquet files
5858
encryption = ["parquet/encryption", "datafusion/parquet_encryption", "dep:base64", "dep:hex"]

README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ The goal of this project is to make DuckLake a first-class, Arrow-native lakehou
1111
## Currently Supported
1212

1313
- Read-only queries against DuckLake catalogs
14-
- DuckDB, PostgreSQL, and MySQL catalog backends
14+
- DuckDB, PostgreSQL, MySQL, and SQLite catalog backends
1515
- Local filesystem and S3-compatible object stores (MinIO, S3)
1616
- Snapshot-based consistency
1717
- Basic and decimal types
@@ -43,7 +43,6 @@ This project is under active development. The roadmap below reflects major areas
4343
### Metadata & Catalog Improvements
4444

4545
- Metadata caching to reduce repeated catalog lookups
46-
- SQLite metadata provider
4746
- Clear abstraction boundaries between catalog, metadata provider, and execution
4847

4948
### Query Planning & Performance
@@ -86,6 +85,7 @@ This project is under active development. The roadmap below reflects major areas
8685
| `metadata-duckdb` | DuckDB catalog backend ||
8786
| `metadata-postgres` | PostgreSQL catalog backend | |
8887
| `metadata-mysql` | MySQL catalog backend | |
88+
| `metadata-sqlite` | SQLite catalog backend | |
8989
| `encryption` | Parquet Modular Encryption (PME) support | |
9090

9191
```bash
@@ -98,8 +98,11 @@ cargo build --no-default-features --features metadata-postgres
9898
# MySQL only
9999
cargo build --no-default-features --features metadata-mysql
100100

101+
# SQLite only
102+
cargo build --no-default-features --features metadata-sqlite
103+
101104
# All backends
102-
cargo build --features metadata-postgres,metadata-mysql
105+
cargo build --features metadata-postgres,metadata-mysql,metadata-sqlite
103106
```
104107

105108
### Example
@@ -115,6 +118,10 @@ cargo run --example basic_query --features metadata-postgres -- \
115118
# MySQL catalog
116119
cargo run --example basic_query --features metadata-mysql -- \
117120
"mysql://user:password@localhost:3306/database" "SELECT * FROM main.users"
121+
122+
# SQLite catalog
123+
cargo run --example basic_query --features metadata-sqlite -- \
124+
"sqlite:///path/to/catalog.db" "SELECT * FROM main.users"
118125
```
119126

120127
### Integration

examples/basic_query.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Basic DuckLake query example with snapshot isolation
22
//!
33
//! This example demonstrates how to:
4-
//! 1. Create a DuckLake catalog from DuckDB, PostgreSQL, or MySQL
4+
//! 1. Create a DuckLake catalog from DuckDB, PostgreSQL, MySQL, or SQLite
55
//! 2. Bind the catalog to a specific snapshot for query consistency
66
//! 3. Register it with DataFusion
77
//! 4. Execute a simple SELECT query
@@ -36,6 +36,13 @@
3636
//! "mysql://user:password@localhost:3306/database" \
3737
//! "SELECT * FROM main.users"
3838
//! ```
39+
//!
40+
//! With SQLite catalog (requires --features metadata-sqlite):
41+
//! ```bash
42+
//! cargo run --example basic_query --features metadata-sqlite \
43+
//! "sqlite:///path/to/catalog.db" \
44+
//! "SELECT * FROM main.users"
45+
//! ```
3946
4047
use datafusion::execution::runtime_env::RuntimeEnv;
4148
use datafusion::prelude::*;
@@ -45,6 +52,8 @@ use datafusion_ducklake::DuckdbMetadataProvider;
4552
use datafusion_ducklake::MySqlMetadataProvider;
4653
#[cfg(feature = "metadata-postgres")]
4754
use datafusion_ducklake::PostgresMetadataProvider;
55+
#[cfg(feature = "metadata-sqlite")]
56+
use datafusion_ducklake::SqliteMetadataProvider;
4857
use datafusion_ducklake::{DuckLakeCatalog, MetadataProvider, register_ducklake_functions};
4958
use object_store::ObjectStore;
5059
use object_store::aws::AmazonS3Builder;
@@ -65,6 +74,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
6574
eprintln!(
6675
" MySQL: cargo run --example basic_query --features metadata-mysql \"mysql://...\" \"SQL\""
6776
);
77+
eprintln!(
78+
" SQLite: cargo run --example basic_query --features metadata-sqlite \"sqlite://...\" \"SQL\""
79+
);
6880
exit(1);
6981
}
7082
let catalog_source = &args[1];
@@ -73,6 +85,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
7385
// Detect provider type based on input
7486
let is_postgres = catalog_source.starts_with("postgresql://");
7587
let is_mysql = catalog_source.starts_with("mysql://");
88+
let is_sqlite = catalog_source.starts_with("sqlite:");
7689

7790
if is_postgres {
7891
#[cfg(not(feature = "metadata-postgres"))]
@@ -106,6 +119,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
106119
println!("Current snapshot ID: {}", snapshot_id);
107120
run_query(provider, snapshot_id, sql).await?;
108121
}
122+
} else if is_sqlite {
123+
#[cfg(not(feature = "metadata-sqlite"))]
124+
{
125+
eprintln!("Error: SQLite support requires the 'metadata-sqlite' feature");
126+
eprintln!("Run with: cargo run --example basic_query --features metadata-sqlite");
127+
exit(1);
128+
}
129+
130+
#[cfg(feature = "metadata-sqlite")]
131+
{
132+
println!("Connecting to SQLite catalog: {}", catalog_source);
133+
let provider = Arc::new(SqliteMetadataProvider::new(catalog_source).await?);
134+
let snapshot_id = provider.get_current_snapshot()?;
135+
println!("Current snapshot ID: {}", snapshot_id);
136+
run_query(provider, snapshot_id, sql).await?;
137+
}
109138
} else {
110139
#[cfg(feature = "metadata-duckdb")]
111140
{

src/error.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@ pub enum DuckLakeError {
1818
#[error("DuckDB error: {0}")]
1919
DuckDb(#[from] duckdb::Error),
2020

21-
/// sqlx database error (for PostgreSQL/MySQL metadata providers)
22-
#[cfg(any(feature = "metadata-postgres", feature = "metadata-mysql"))]
21+
/// sqlx database error (for PostgreSQL/MySQL/SQLite metadata providers)
22+
#[cfg(any(
23+
feature = "metadata-postgres",
24+
feature = "metadata-mysql",
25+
feature = "metadata-sqlite"
26+
))]
2327
#[error("Database error: {0}")]
2428
Sqlx(#[from] sqlx::Error),
2529

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ pub mod metadata_provider_duckdb;
5656
pub mod metadata_provider_mysql;
5757
#[cfg(feature = "metadata-postgres")]
5858
pub mod metadata_provider_postgres;
59+
#[cfg(feature = "metadata-sqlite")]
60+
pub mod metadata_provider_sqlite;
5961

6062
// Result type for DuckLake operations
6163
pub type Result<T> = std::result::Result<T, DuckLakeError>;
@@ -75,3 +77,5 @@ pub use metadata_provider_duckdb::DuckdbMetadataProvider;
7577
pub use metadata_provider_mysql::MySqlMetadataProvider;
7678
#[cfg(feature = "metadata-postgres")]
7779
pub use metadata_provider_postgres::PostgresMetadataProvider;
80+
#[cfg(feature = "metadata-sqlite")]
81+
pub use metadata_provider_sqlite::SqliteMetadataProvider;

src/metadata_provider.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ pub trait MetadataProvider: Send + Sync + std::fmt::Debug {
405405
) -> Result<Vec<DeleteFileChange>>;
406406
}
407407

408-
#[cfg(any(feature = "metadata-postgres", feature = "metadata-mysql"))]
408+
#[cfg(any(feature = "metadata-postgres", feature = "metadata-mysql", feature = "metadata-sqlite"))]
409409
/// Helper function to bridge async sqlx operations to sync MetadataProvider trait
410410
pub(crate) fn block_on<F, T>(f: F) -> T
411411
where

0 commit comments

Comments
 (0)