Skip to content

Commit 36a429f

Browse files
Add information_schema virtual tables for catalog metadata queries
1 parent 1cd5657 commit 36a429f

8 files changed

Lines changed: 1059 additions & 21 deletions

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ A DataFusion extension for querying [DuckLake](https://ducklake.select). DuckLak
1616
- Parquet footer size hints for optimized I/O
1717
- Filter pushdown to Parquet for row group pruning and page-level filtering
1818
- Dynamic metadata lookup (no upfront catalog caching)
19+
- SQL-queryable `information_schema` for catalog metadata (snapshots, schemas, tables, columns, files)
1920

2021
## Known Limitations
2122

src/catalog.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::any::Any;
44
use std::sync::Arc;
55

66
use crate::Result;
7+
use crate::information_schema::InformationSchemaProvider;
78
use crate::metadata_provider::MetadataProvider;
89
use crate::path_resolver::parse_object_store_url;
910
use crate::schema::DuckLakeSchema;
@@ -53,25 +54,38 @@ impl CatalogProvider for DuckLakeCatalog {
5354
fn schema_names(&self) -> Vec<String> {
5455
let snapshot_id = match self.get_current_snapshot_id() {
5556
Ok(id) => id,
56-
Err(_) => return Vec::new(),
57+
Err(_) => return vec!["information_schema".to_string()],
5758
};
5859

59-
// Query database with snapshot_id
60-
self.provider
60+
// Start with information_schema
61+
let mut names = vec!["information_schema".to_string()];
62+
63+
// Add data schemas from catalog
64+
let data_schemas = self
65+
.provider
6166
.list_schemas(snapshot_id)
6267
.unwrap_or_default()
6368
.into_iter()
64-
.map(|s| s.schema_name)
65-
.collect()
69+
.map(|s| s.schema_name);
70+
71+
names.extend(data_schemas);
72+
names
6673
}
6774

6875
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
76+
// Handle information_schema specially
77+
if name == "information_schema" {
78+
return Some(Arc::new(InformationSchemaProvider::new(Arc::clone(
79+
&self.provider,
80+
))));
81+
}
82+
6983
let snapshot_id = match self.get_current_snapshot_id() {
7084
Ok(id) => id,
7185
Err(_) => return None,
7286
};
7387

74-
// Query database with snapshot_id
88+
// Query database with snapshot_id for data schemas
7589
match self.provider.get_schema_by_name(name, snapshot_id) {
7690
Ok(Some(meta)) => {
7791
// Resolve schema path hierarchically

0 commit comments

Comments
 (0)