From 837be2b88a9510cd62efb2b137f7ad86cbb73581 Mon Sep 17 00:00:00 2001 From: Anoop Narang Date: Wed, 20 May 2026 12:57:23 +0530 Subject: [PATCH 1/3] fix(writer): align ducklake_column / ducklake_data_file with the spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `SqliteMetadataWriter` was producing a `ducklake_column` table that lacked `parent_column`, `initial_default`, `default_value`, `default_value_type`, and `default_value_dialect`. The reader's SQL_GET_TABLE_COLUMNS query (added in PR #89) projects `parent_column`, so any catalog written by `SqliteMetadataWriter` was unreadable through the metadata provider, panicking with "no such column: parent_column". The bug shipped because CI ran `cargo test` without `write-sqlite` features, and both `tests/sql_write_tests.rs` and `tests/sqlite_metadata_provider_test.rs` are gated behind features that CI didn't enable — so the broken-catalog path compiled to a zero-test binary in CI and was invisible. Three changes: 1. `metadata_writer_sqlite.rs::SQL_CREATE_SCHEMA` adds the five missing `ducklake_column` columns, matching the upstream `ducklake_metadata_manager.cpp` schema definition. We don't populate the four `*_default*` columns today (we don't support nested types or column defaults yet), but the columns must exist so the reader query doesn't error. 2. `tests/sqlite_metadata_provider_test.rs::init_schema` is brought in line with the same spec. It also gains `row_id_start`, `record_count`, and `mapping_id` on `ducklake_data_file` — these are projected by SQL_GET_DATA_FILES since the row-lineage work landed in PR #115 and any sqlite-fixture test that exercises the data-file read path would have hit "no such column" without them. 3. `.github/workflows/ci.yml` enables `write-sqlite` on the test step so `sql_write_tests` and `sqlite_metadata_provider_test` actually run. Without this, the next analogous schema drift will ship invisibly again. Verified by running `cargo test --features "skip-tests-with-docker write-sqlite"` locally — 228 unit tests + every integration suite green, including the 7 previously-broken `sql_write_tests` and 18 `sqlite_metadata_provider_test` cases. Out of scope (separately broken with the same root cause, no CI coverage today): the encryption, postgres, and mysql test fixtures have analogous spec gaps. Worth a follow-up once CI for those backends is set up. --- .github/workflows/ci.yml | 9 +++++++-- src/metadata_writer_sqlite.rs | 13 +++++++++++++ tests/sqlite_metadata_provider_test.rs | 15 +++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6281d15..71c7e06 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,10 +84,15 @@ jobs: - name: Run tests run: | + # Enable write-sqlite so tests/sql_write_tests.rs (gated on + # write-sqlite + metadata-sqlite) actually runs in CI. Without + # this, the file compiles to a zero-test binary and any + # writer-schema regression is invisible — see PR #115's + # post-merge debugging for the prior consequence. if [[ "$RUNNER_OS" == "macOS" ]]; then - cargo test --features skip-tests-with-docker + cargo test --features "skip-tests-with-docker write-sqlite" else - cargo test + cargo test --features "write-sqlite" fi - name: Check documentation diff --git a/src/metadata_writer_sqlite.rs b/src/metadata_writer_sqlite.rs index 09698fb..2e567ff 100644 --- a/src/metadata_writer_sqlite.rs +++ b/src/metadata_writer_sqlite.rs @@ -50,6 +50,19 @@ CREATE TABLE IF NOT EXISTS ducklake_column ( column_type VARCHAR NOT NULL, column_order INTEGER NOT NULL, nulls_allowed BOOLEAN DEFAULT 1, + -- Columns below match the DuckLake spec (see + -- ducklake_metadata_manager.cpp upstream). We don't populate them + -- today (our writer doesn't support nested types or column defaults) + -- but their PRESENCE in the schema is required so the reader's + -- SQL_GET_TABLE_COLUMNS query — which projects `parent_column` — + -- doesn't fail on tables we wrote. Leaving the other four in for + -- compatibility with anything DuckDB or future reader code may + -- project from the same row. + initial_default VARCHAR, + default_value VARCHAR, + parent_column INTEGER, + default_value_type VARCHAR, + default_value_dialect VARCHAR, begin_snapshot INTEGER NOT NULL, end_snapshot INTEGER ); diff --git a/tests/sqlite_metadata_provider_test.rs b/tests/sqlite_metadata_provider_test.rs index f2a909b..02a5a11 100644 --- a/tests/sqlite_metadata_provider_test.rs +++ b/tests/sqlite_metadata_provider_test.rs @@ -68,6 +68,10 @@ async fn init_schema(pool: &SqlitePool) -> anyhow::Result<()> { .execute(pool) .await?; + // The five non-`column_id`/`table_id`/etc. columns below mirror the + // upstream DuckLake spec. The reader's SQL_GET_TABLE_COLUMNS query + // projects `parent_column`, so this fixture has to provide it or the + // metadata provider errors with "no such column". sqlx::query( "CREATE TABLE IF NOT EXISTS ducklake_column ( column_id INTEGER PRIMARY KEY, @@ -76,6 +80,11 @@ async fn init_schema(pool: &SqlitePool) -> anyhow::Result<()> { column_type TEXT NOT NULL, column_order INTEGER NOT NULL, nulls_allowed INTEGER, + initial_default TEXT, + default_value TEXT, + parent_column INTEGER, + default_value_type TEXT, + default_value_dialect TEXT, begin_snapshot INTEGER NOT NULL DEFAULT 1, end_snapshot INTEGER, FOREIGN KEY (table_id) REFERENCES ducklake_table(table_id) @@ -84,6 +93,9 @@ async fn init_schema(pool: &SqlitePool) -> anyhow::Result<()> { .execute(pool) .await?; + // `row_id_start` and `record_count` are projected by SQL_GET_DATA_FILES + // since the row-lineage work in PR #115. The fixture must surface them + // so the provider can hydrate `DuckLakeTableFile`. sqlx::query( "CREATE TABLE IF NOT EXISTS ducklake_data_file ( data_file_id INTEGER PRIMARY KEY, @@ -93,6 +105,9 @@ async fn init_schema(pool: &SqlitePool) -> anyhow::Result<()> { file_size_bytes INTEGER NOT NULL, footer_size INTEGER, encryption_key TEXT, + record_count INTEGER, + row_id_start INTEGER, + mapping_id INTEGER, begin_snapshot INTEGER NOT NULL DEFAULT 1, end_snapshot INTEGER, FOREIGN KEY (table_id) REFERENCES ducklake_table(table_id) From a840c0f60ed366ee244f5ba2567e49fadd61891f Mon Sep 17 00:00:00 2001 From: Anoop Narang Date: Wed, 20 May 2026 13:09:00 +0530 Subject: [PATCH 2/3] docs(ci): reword the write-sqlite comment so the intent isn't ambiguous The earlier wording read like "enable write-sqlite so the file runs" which could be parsed as either "we previously didn't run them and now we are" (correct) or "we want to skip running them" (wrong). Rephrased to make clear we're adding features so the tests become visible to cargo, not the other way round. --- .github/workflows/ci.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71c7e06..2852924 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,12 +83,13 @@ jobs: run: cargo clippy --all-targets --all-features --no-deps -- -D warnings - name: Run tests + # `--features write-sqlite` is required to make the writer + sqlite- + # provider test suites visible to `cargo test`. Both test files are + # gated with `#![cfg(feature = "write-sqlite")]` / + # `#![cfg(feature = "metadata-sqlite")]`, so without these features + # cargo silently compiles them to zero-test binaries and any + # writer-schema regression slips through CI unnoticed. run: | - # Enable write-sqlite so tests/sql_write_tests.rs (gated on - # write-sqlite + metadata-sqlite) actually runs in CI. Without - # this, the file compiles to a zero-test binary and any - # writer-schema regression is invisible — see PR #115's - # post-merge debugging for the prior consequence. if [[ "$RUNNER_OS" == "macOS" ]]; then cargo test --features "skip-tests-with-docker write-sqlite" else From 86f29476b70ea261ccd404edbaa0938e2b70c2ef Mon Sep 17 00:00:00 2001 From: Anoop Narang Date: Wed, 20 May 2026 13:17:56 +0530 Subject: [PATCH 3/3] docs: trim the new comments to just the non-obvious WHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three comment blocks added earlier in this branch were verbose, duplicated reasoning across files, or carried a PR reference that will rot. Trimmed each to the smallest signal a future reader needs: * CI: 6 lines → 3. Keep the "without this, cargo compiles to a zero-test binary" hazard; drop the cfg-syntax tutorial. * metadata_writer_sqlite.rs: 8 lines → 5. Spell out who reads each group of columns (our reader vs DuckDB) without re-explaining the same thing twice. * sqlite_metadata_provider_test.rs: 7 lines across two blocks → 1. Replace the duplicated "why columns exist" lecture with a single pointer to the writer file. Also drops the "since PR #115" reference per the don't-cite-PR-numbers convention — the linkage is now in the writer file's comment. --- .github/workflows/ci.yml | 9 +++------ src/metadata_writer_sqlite.rs | 13 +++++-------- tests/sqlite_metadata_provider_test.rs | 8 +------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2852924..f50ca20 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,12 +83,9 @@ jobs: run: cargo clippy --all-targets --all-features --no-deps -- -D warnings - name: Run tests - # `--features write-sqlite` is required to make the writer + sqlite- - # provider test suites visible to `cargo test`. Both test files are - # gated with `#![cfg(feature = "write-sqlite")]` / - # `#![cfg(feature = "metadata-sqlite")]`, so without these features - # cargo silently compiles them to zero-test binaries and any - # writer-schema regression slips through CI unnoticed. + # `write-sqlite` pulls in the writer and sqlite-provider tests, + # which are `#![cfg]`-gated; without it cargo compiles them to + # zero-test binaries. run: | if [[ "$RUNNER_OS" == "macOS" ]]; then cargo test --features "skip-tests-with-docker write-sqlite" diff --git a/src/metadata_writer_sqlite.rs b/src/metadata_writer_sqlite.rs index 2e567ff..47883fa 100644 --- a/src/metadata_writer_sqlite.rs +++ b/src/metadata_writer_sqlite.rs @@ -50,14 +50,11 @@ CREATE TABLE IF NOT EXISTS ducklake_column ( column_type VARCHAR NOT NULL, column_order INTEGER NOT NULL, nulls_allowed BOOLEAN DEFAULT 1, - -- Columns below match the DuckLake spec (see - -- ducklake_metadata_manager.cpp upstream). We don't populate them - -- today (our writer doesn't support nested types or column defaults) - -- but their PRESENCE in the schema is required so the reader's - -- SQL_GET_TABLE_COLUMNS query — which projects `parent_column` — - -- doesn't fail on tables we wrote. Leaving the other four in for - -- compatibility with anything DuckDB or future reader code may - -- project from the same row. + -- Mirror the upstream DuckLake spec (ducklake_metadata_manager.cpp): + -- `parent_column` is projected by our reader's SQL_GET_TABLE_COLUMNS; + -- the four `*default*` columns are projected by DuckDB when it reads + -- catalogs we produce. We leave them NULL — no nested-type or + -- column-default writes yet. initial_default VARCHAR, default_value VARCHAR, parent_column INTEGER, diff --git a/tests/sqlite_metadata_provider_test.rs b/tests/sqlite_metadata_provider_test.rs index 02a5a11..0802b1d 100644 --- a/tests/sqlite_metadata_provider_test.rs +++ b/tests/sqlite_metadata_provider_test.rs @@ -68,10 +68,7 @@ async fn init_schema(pool: &SqlitePool) -> anyhow::Result<()> { .execute(pool) .await?; - // The five non-`column_id`/`table_id`/etc. columns below mirror the - // upstream DuckLake spec. The reader's SQL_GET_TABLE_COLUMNS query - // projects `parent_column`, so this fixture has to provide it or the - // metadata provider errors with "no such column". + // Schema must match SQL_CREATE_SCHEMA in metadata_writer_sqlite.rs. sqlx::query( "CREATE TABLE IF NOT EXISTS ducklake_column ( column_id INTEGER PRIMARY KEY, @@ -93,9 +90,6 @@ async fn init_schema(pool: &SqlitePool) -> anyhow::Result<()> { .execute(pool) .await?; - // `row_id_start` and `record_count` are projected by SQL_GET_DATA_FILES - // since the row-lineage work in PR #115. The fixture must surface them - // so the provider can hydrate `DuckLakeTableFile`. sqlx::query( "CREATE TABLE IF NOT EXISTS ducklake_data_file ( data_file_id INTEGER PRIMARY KEY,