From 448f0a8937348c1aaff3980ed1fee7397f7ee0d3 Mon Sep 17 00:00:00 2001
From: Luke Kim <80174+lukekim@users.noreply.github.com>
Date: Mon, 2 Mar 2026 18:53:02 -0800
Subject: [PATCH 1/3] docs: add DuckLake catalog connector recipe

---
 catalogs/ducklake/README.md     | 249 ++++++++++++++++++++++++++++++++
 catalogs/ducklake/spicepod.yaml |   7 +
 2 files changed, 256 insertions(+)
 create mode 100644 catalogs/ducklake/README.md
 create mode 100644 catalogs/ducklake/spicepod.yaml

diff --git a/catalogs/ducklake/README.md b/catalogs/ducklake/README.md
new file mode 100644
index 00000000..b0b378d1
--- /dev/null
+++ b/catalogs/ducklake/README.md
@@ -0,0 +1,249 @@
+# DuckLake Catalog Connector
+
+The DuckLake Catalog Connector enables Spice to automatically discover and query all schemas and tables in a [DuckLake](https://ducklake.select/) catalog — an open lakehouse format that stores metadata in a SQLite-compatible database and data in Parquet files.
+
+## Prerequisites
+
+- [DuckDB CLI](https://duckdb.org/docs/installation/) installed (to create a DuckLake catalog).
+- Spice is installed (see the [Getting Started](https://docs.spiceai.org/getting-started) documentation).
+
+## Step 1. Create a new directory and initialize a Spicepod
+
+```bash
+mkdir ducklake-catalog-recipe
+cd ducklake-catalog-recipe
+spice init
+```
+
+## Step 2. Create a DuckLake catalog with sample data
+
+Open DuckDB and create a DuckLake catalog with TPC-H sample data:
+
+```bash
+duckdb
+```
+
+Install and load the DuckLake and TPC-H extensions, then create a catalog and populate it:
+
+```sql
+INSTALL ducklake;
+LOAD ducklake;
+INSTALL tpch;
+LOAD tpch;
+
+-- Create a DuckLake catalog with local metadata storage
+ATTACH 'ducklake:metadata.ducklake' AS my_lakehouse;
+
+-- Generate TPC-H data (scale factor 0.01 for a quick demo)
+CALL dbgen(sf = 0.01, catalog = 'my_lakehouse');
+```
+
+Verify the tables were created:
+
+```sql
+SHOW ALL TABLES;
+```
+
+```text
+┌──────────────┬─────────┬──────────┬──────────────────┬───────────────────────────────────────────────────────────────┬───────────┐
+│   database   │ schema  │   name   │ column_names     │ column_types                                                │ temporary │
+│   varchar    │ varchar │ varchar  │    varchar[]      │             varchar[]                                       │  boolean  │
+├──────────────┼─────────┼──────────┼──────────────────┼───────────────────────────────────────────────────────────────┤───────────┤
+│ my_lakehouse │ main    │ customer │ [c_custkey, ...]  │ [INTEGER, VARCHAR, ...]                                     │ false     │
+│ my_lakehouse │ main    │ lineitem │ [l_orderkey, ...] │ [INTEGER, INTEGER, ...]                                     │ false     │
+│ my_lakehouse │ main    │ nation   │ [n_nationkey, ...]│ [INTEGER, VARCHAR, ...]                                     │ false     │
+│ my_lakehouse │ main    │ orders   │ [o_orderkey, ...] │ [INTEGER, INTEGER, ...]                                     │ false     │
+│ my_lakehouse │ main    │ part     │ [p_partkey, ...]  │ [INTEGER, VARCHAR, ...]                                     │ false     │
+│ my_lakehouse │ main    │ partsupp │ [ps_partkey, ...] │ [INTEGER, INTEGER, ...]                                     │ false     │
+│ my_lakehouse │ main    │ region   │ [r_regionkey, ...]│ [INTEGER, VARCHAR, ...]                                     │ false     │
+│ my_lakehouse │ main    │ supplier │ [s_suppkey, ...]  │ [INTEGER, VARCHAR, ...]                                     │ false     │
+└──────────────┴─────────┴──────────┴──────────────────┴───────────────────────────────────────────────────────────────┴───────────┘
+```
+
+Exit DuckDB:
+
+```sql
+.exit
+```
+
+## Step 3. Configure the DuckLake Catalog Connector in your Spicepod
+
+Edit `spicepod.yaml` to add the DuckLake catalog:
+
+```yaml
+version: v1
+kind: Spicepod
+name: ducklake-catalog-recipe
+
+catalogs:
+  - from: ducklake:metadata.ducklake
+    name: my_lakehouse
+```
+
+## Step 4. Start the Spice runtime
+
+```bash
+spice run
+```
+
+Observe that Spice discovers all schemas and tables:
+
+```bash
+2026-03-02T10:00:00.000000Z  INFO runtime::init::catalog: Registering catalog 'my_lakehouse' for ducklake
+2026-03-02T10:00:00.500000Z  INFO runtime::init::catalog: Registered catalog 'my_lakehouse' with 1 schema and 8 tables
+```
+
+## Step 5. Query the DuckLake catalog
+
+In a new terminal, start the Spice SQL REPL:
+
+```bash
+spice sql
+```
+
+List all discovered tables:
+
+```sql
+SHOW TABLES;
+```
+
+```text
++---------------+--------------+------------+------------+
+| table_catalog | table_schema | table_name | table_type |
++---------------+--------------+------------+------------+
+| my_lakehouse  | main         | customer   | BASE TABLE |
+| my_lakehouse  | main         | lineitem   | BASE TABLE |
+| my_lakehouse  | main         | nation     | BASE TABLE |
+| my_lakehouse  | main         | orders     | BASE TABLE |
+| my_lakehouse  | main         | part       | BASE TABLE |
+| my_lakehouse  | main         | partsupp   | BASE TABLE |
+| my_lakehouse  | main         | region     | BASE TABLE |
+| my_lakehouse  | main         | supplier   | BASE TABLE |
+| spice         | runtime      | task_history | BASE TABLE |
+| spice         | runtime      | metrics    | BASE TABLE |
++---------------+--------------+------------+------------+
+```
+
+Query the customer table:
+
+```sql
+SELECT c_custkey, c_name, c_mktsegment, c_acctbal
+FROM my_lakehouse.main.customer
+LIMIT 5;
+```
+
+```text
++-----------+--------------------+--------------+-----------+
+| c_custkey | c_name             | c_mktsegment | c_acctbal |
++-----------+--------------------+--------------+-----------+
+| 1         | Customer#000000001 | BUILDING     | 711.56    |
+| 2         | Customer#000000002 | AUTOMOBILE   | 121.65    |
+| 3         | Customer#000000003 | AUTOMOBILE   | 7498.12   |
+| 4         | Customer#000000004 | MACHINERY    | 2866.83   |
+| 5         | Customer#000000005 | HOUSEHOLD    | 794.47    |
++-----------+--------------------+--------------+-----------+
+```
+
+Run a cross-table query:
+
+```sql
+SELECT n.n_name AS nation, COUNT(*) AS num_customers, ROUND(AVG(c.c_acctbal), 2) AS avg_balance
+FROM my_lakehouse.main.customer c
+JOIN my_lakehouse.main.nation n ON c.c_nationkey = n.n_nationkey
+GROUP BY n.n_name
+ORDER BY num_customers DESC
+LIMIT 5;
+```
+
+## Step 6. Enable read-write access (optional)
+
+To enable write operations, update the catalog configuration with `access: read_write`:
+
+```yaml
+version: v1
+kind: Spicepod
+name: ducklake-catalog-recipe
+
+catalogs:
+  - from: ducklake:metadata.ducklake
+    name: my_lakehouse
+    access: read_write
+```
+
+Restart Spice and insert data:
+
+```bash
+spice run
+```
+
+```bash
+spice sql
+```
+
+```sql
+INSERT INTO my_lakehouse.main.region (r_regionkey, r_name, r_comment)
+VALUES (5, 'ANTARCTICA', 'A cold and remote region');
+```
+
+```text
++-------+
+| count |
++-------+
+| 1     |
++-------+
+```
+
+Verify the insert:
+
+```sql
+SELECT * FROM my_lakehouse.main.region ORDER BY r_regionkey;
+```
+
+## Using the DuckLake Data Connector
+
+Instead of the catalog connector (which auto-discovers all tables), you can connect to specific tables using the DuckLake data connector:
+
+```yaml
+version: v1
+kind: Spicepod
+name: ducklake-data-connector-recipe
+
+datasets:
+  - from: ducklake:customer
+    name: customer
+    params:
+      connection_string: metadata.ducklake
+  - from: ducklake:orders
+    name: orders
+    params:
+      connection_string: metadata.ducklake
+```
+
+This is useful when you only need specific tables or want to configure each dataset independently (e.g., with different acceleration settings).
+
+## Using with Cloud Storage (S3)
+
+DuckLake supports storing metadata and data on cloud storage. To use S3:
+
+1. Ensure AWS credentials are available via environment variables, `~/.aws/credentials`, or an IAM instance profile.
+
+2. Create a DuckLake catalog on S3 (via DuckDB CLI):
+
+```sql
+ATTACH 'ducklake:s3://my-bucket/lakehouse/metadata.ducklake' AS cloud_lakehouse;
+```
+
+3. Configure the Spice catalog:
+
+```yaml
+catalogs:
+  - from: ducklake:s3://my-bucket/lakehouse/metadata.ducklake
+    name: cloud_lakehouse
+```
+
+## Learn more
+
+- [DuckLake website](https://ducklake.select/)
+- [DuckLake Catalog Connector documentation](https://spiceai.org/docs/components/catalogs/ducklake)
+- [DuckLake Data Connector documentation](https://spiceai.org/docs/components/data-connectors/ducklake)
+- For using `spice sql`, see the [CLI reference](https://docs.spiceai.org/cli/reference/sql).
diff --git a/catalogs/ducklake/spicepod.yaml b/catalogs/ducklake/spicepod.yaml
new file mode 100644
index 00000000..04a885f2
--- /dev/null
+++ b/catalogs/ducklake/spicepod.yaml
@@ -0,0 +1,7 @@
+version: v1
+kind: Spicepod
+name: ducklake-catalog-recipe
+
+catalogs:
+  - from: ducklake:metadata.ducklake
+    name: my_lakehouse

From f032e47bf24187508f14f877aa91b0fb6f471a76 Mon Sep 17 00:00:00 2001
From: Luke Kim <80174+lukekim@users.noreply.github.com>
Date: Mon, 2 Mar 2026 19:16:06 -0800
Subject: [PATCH 2/3] docs: update note about DuckLake connector availability
 in Spice v2.0 or later

---
 catalogs/ducklake/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/catalogs/ducklake/README.md b/catalogs/ducklake/README.md
index b0b378d1..cd46c2dd 100644
--- a/catalogs/ducklake/README.md
+++ b/catalogs/ducklake/README.md
@@ -1,5 +1,7 @@
 # DuckLake Catalog Connector
 
+> **Note:** The DuckLake connector is available in Spice v2.0 or later.
+
 The DuckLake Catalog Connector enables Spice to automatically discover and query all schemas and tables in a [DuckLake](https://ducklake.select/) catalog — an open lakehouse format that stores metadata in a SQLite-compatible database and data in Parquet files.
 
 ## Prerequisites

From 8efa1d14e8a75d569adfa799eef7d12e5b72575d Mon Sep 17 00:00:00 2001
From: Luke Kim <80174+lukekim@users.noreply.github.com>
Date: Fri, 10 Apr 2026 10:08:18 -0700
Subject: [PATCH 3/3] docs: fix DuckLake recipe dbgen compatibility and add
 version requirements

dbgen does not support generating data directly into DuckLake catalogs.
Generate TPC-H data in-memory first, then copy tables into DuckLake.
Add DuckDB v1.3.0+ and Spice v2.0+ version requirements to prerequisites.
---
 catalogs/ducklake/README.md | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/catalogs/ducklake/README.md b/catalogs/ducklake/README.md
index cd46c2dd..efe48aae 100644
--- a/catalogs/ducklake/README.md
+++ b/catalogs/ducklake/README.md
@@ -6,8 +6,8 @@ The DuckLake Catalog Connector enables Spice to automatically discover and query
 
 ## Prerequisites
 
-- [DuckDB CLI](https://duckdb.org/docs/installation/) installed (to create a DuckLake catalog).
-- Spice is installed (see the [Getting Started](https://docs.spiceai.org/getting-started) documentation).
+- [DuckDB CLI](https://duckdb.org/docs/installation/) v1.3.0 or later installed (to create a DuckLake catalog).
+- Spice v2.0 or later is installed (see the [Getting Started](https://docs.spiceai.org/getting-started) documentation).
 
 ## Step 1. Create a new directory and initialize a Spicepod
 
@@ -33,11 +33,21 @@ LOAD ducklake;
 INSTALL tpch;
 LOAD tpch;
 
+-- Generate TPC-H data in-memory (scale factor 0.01 for a quick demo)
+CALL dbgen(sf = 0.01);
+
 -- Create a DuckLake catalog with local metadata storage
 ATTACH 'ducklake:metadata.ducklake' AS my_lakehouse;
 
--- Generate TPC-H data (scale factor 0.01 for a quick demo)
-CALL dbgen(sf = 0.01, catalog = 'my_lakehouse');
+-- Copy tables into DuckLake
+CREATE TABLE my_lakehouse.main.customer AS SELECT * FROM customer;
+CREATE TABLE my_lakehouse.main.lineitem AS SELECT * FROM lineitem;
+CREATE TABLE my_lakehouse.main.nation AS SELECT * FROM nation;
+CREATE TABLE my_lakehouse.main.orders AS SELECT * FROM orders;
+CREATE TABLE my_lakehouse.main.part AS SELECT * FROM part;
+CREATE TABLE my_lakehouse.main.partsupp AS SELECT * FROM partsupp;
+CREATE TABLE my_lakehouse.main.region AS SELECT * FROM region;
+CREATE TABLE my_lakehouse.main.supplier AS SELECT * FROM supplier;
 ```
 
 Verify the tables were created: