CogitatorTech · habedi · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -18,7 +18,7 @@ env:
   EXTENSION_API_VERSION: v1.2.0
   ZIG_VERSION: 0.15.2
   DUCKDB_VERSION: v1.5.0
-  SUPPORTED_DUCKDB_VERSIONS: v1.2.0,v1.2.1,v1.2.2,v1.3.0,v1.3.1,v1.3.2,v1.4.0,v1.4.1,v1.4.2,v1.4.3,v1.5.0
+  SUPPORTED_DUCKDB_VERSIONS: v1.2.0,v1.2.1,v1.2.2,v1.3.0,v1.3.1,v1.3.2,v1.4.0,v1.4.1,v1.4.2,v1.4.3,v1.4.4,v1.5.0
 
 jobs:
   call-tests:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -28,6 +28,9 @@ issue you would like to work on or if it has already been resolved.
 
 ### Development Workflow
 
+> [!IMPORTANT]
+> If you're using an AI-assisted coding tool like Claude Code or Codex, make sure the AI follows the instructions in the [AGENTS.md](AGENTS.md) file.
+
 #### Prerequisites
 
 Install GNU Make on your system if it's not already installed.

diff --git a/README.md b/README.md
@@ -137,7 +137,11 @@ Example output:
 
 ### Documentation
 
-Check out the project documentation [here](https://cogitatortech.github.io/vizier/).
+Check out the project documentation [here](https://cogitatortech.github.io/vizier/) for more details, including examples and API reference.
+
+### Benchmarks
+
+See the [benches](benches/README.md) directory on how to run the local benchmarks for Vizier and its API.
 
 ---
 

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -93,12 +93,19 @@ This document outlines the features implemented in Vizier and the future goals f
 - [x] `vizier.replay_summary` regression detection view
 - [x] `vizier.replay_results` per-query replay breakdown table
 
+### Integrations
+
+- [x] dbt import script (`scripts/import_dbt.py`) for capturing workloads from dbt run results
+
 ### Development and Testing
 
 - [x] Unit, property-based, integration, and standalone SQL tests
 - [x] SQL-based benchmarks against real-world datasets (`benches/` and `make bench`)
+- [x] TPC-H correctness validation (`benches/tpch_correctness.sql`)
 - [x] Cross-platform build support (Linux, macOS, Windows, and FreeBSD)
 - [x] CI pipeline with 9-platform cross-compile
+- [x] Schema migration support for upgrades (`ALTER TABLE ADD COLUMN IF NOT EXISTS`)
+- [x] Cross-version state load/save compatibility (`INSERT BY NAME`)
 
 ### Persistent State
 
@@ -130,12 +137,12 @@ Replace frequency-based scoring with cost-aware scoring that factors in query pe
 
 ### Web Dashboard
 
+- [x] Static HTML report export via `vizier_report(path)`
 - [ ] Embedded HTTP server in the extension (or standalone `vizier-dashboard` binary)
 - [ ] Workload heatmap: tables and columns colored by query frequency and time spent
 - [ ] Recommendation list with score bars, one-click apply, and dry-run preview
 - [ ] Before/after plan diff viewer and timeline of applied changes
 - [ ] Physical design overview: tables, indexes, sizes, and predicate coverage
-- [x] Static HTML report export via `vizier_report(path)`
 
 ### LLM-Powered Explanations
 

diff --git a/benches/README.md b/benches/README.md
@@ -18,16 +18,18 @@ Or directly:
 ### Running a Single Benchmark
 
 ```bash
-./benches/run.sh tpch           # Runs only tpch_workload.sql
-./benches/run.sh capture        # Runs only capture_throughput.sql
-./benches/run.sh advisor        # Runs only advisor_throughput.sql
+./benches/run.sh tpch_workload   # Runs only tpch_workload.sql
+./benches/run.sh tpch_correct   # Runs only tpch_correctness.sql
+./benches/run.sh capture         # Runs only capture_throughput.sql
+./benches/run.sh advisor         # Runs only advisor_throughput.sql
 ```
 
 ### Benchmark Descriptions
 
 | File                     | What it measures                                                                                         |
 |--------------------------|----------------------------------------------------------------------------------------------------------|
 | `tpch_workload.sql`      | End-to-end: TPC-H data (500K+ rows), 12 analytical queries, advisors, apply, before and after benchmarks |
+| `tpch_correctness.sql`   | Correctness: real TPC-H via `dbgen`, all 22 queries, validates recommendations against expected results  |
 | `advisor_throughput.sql` | Advisor speed: analyze time at 10 and 30 query workload sizes across 3 tables                            |
 | `capture_throughput.sql` | Capture speed: 50 individual captures, 500 bulk captures, flush throughput                               |
 

diff --git a/benches/tpch_correctness.sql b/benches/tpch_correctness.sql
@@ -0,0 +1,208 @@
+-- TPC-H correctness validation for Vizier
+-- Generates real TPC-H data via dbgen, feeds all 22 queries into Vizier,
+-- and checks whether the recommendations are reasonable.
+--
+-- Expected recommendations:
+--   lineitem: sort by l_shipdate (most filtered column across Q1, Q3, Q4, Q6, Q7, Q12, Q14, Q15)
+--   orders:   sort by o_orderdate (filtered in Q3, Q4, Q5, Q8, Q10)
+--   customer: index on c_mktsegment (equality filter in Q3) or c_nationkey
+--   region:   index on r_name (equality filter in Q2, Q5, Q8)
+--   nation:   index on n_regionkey (join predicate in Q2, Q5, Q8)
+--   supplier: index on s_nationkey (join predicate)
+--   partsupp: considered for join-path sort
+--
+-- Usage: duckdb -unsigned -c ".read benches/tpch_correctness.sql"
+
+load 'zig-out/lib/vizier.duckdb_extension';
+install tpch;
+load tpch;
+
+-- ======================================================================
+-- 1. Generate TPC-H data at scale factor 0.1 (~60K lineitem rows)
+-- ======================================================================
+select '>>> Generating TPC-H SF=0.1' as step;
+call dbgen(sf=0.1);
+
+select 'lineitem' as tbl, count(*) as rows from lineitem
+union all select 'orders', count(*) from orders
+union all select 'customer', count(*) from customer
+union all select 'supplier', count(*) from supplier
+union all select 'nation', count(*) from nation
+union all select 'region', count(*) from region
+union all select 'part', count(*) from part
+union all select 'partsupp', count(*) from partsupp
+order by rows desc;
+
+-- ======================================================================
+-- 2. Capture all 22 TPC-H queries (each repeated to boost frequency)
+-- ======================================================================
+select '>>> Capturing all 22 TPC-H queries' as step;
+
+-- Create a persistent table so g_flush_conn can access it for bulk capture
+create table vizier_tpch_queries as
+  select query_nr, query from tpch_queries();
+
+-- Capture each query 3 times to simulate a repeated workload
+select * from vizier_capture_bulk('vizier_tpch_queries', 'query');
+select * from vizier_capture_bulk('vizier_tpch_queries', 'query');
+select * from vizier_capture_bulk('vizier_tpch_queries', 'query');
+
+select * from vizier_flush();
+
+-- ======================================================================
+-- 3. Check captured workload
+-- ======================================================================
+select '>>> Workload summary' as step;
+select query_signature as sig, execution_count as runs,
+       avg_time_ms as avg_ms, estimated_rows,
+       substr(sample_sql, 1, 80) || '...' as sql_preview
+from vizier.workload_queries
+order by execution_count desc;
+
+select '>>> Predicate summary' as step;
+select table_name, column_name, predicate_kind, frequency
+from vizier.workload_predicates
+order by frequency desc, table_name, column_name
+limit 30;
+
+-- ======================================================================
+-- 4. Run advisors
+-- ======================================================================
+select '>>> Running all advisors' as step;
+select * from vizier_analyze();
+
+-- ======================================================================
+-- 5. Validate recommendations
+-- ======================================================================
+select '>>> All recommendations (ranked by score)' as step;
+select recommendation_id as id, kind, table_name,
+       round(score, 3) as score, round(confidence, 2) as conf,
+       substr(reason, 1, 100) as reason_preview
+from vizier.recommendations
+order by score desc;
+
+-- Validation checks: verify expected recommendations exist
+select '>>> Validation: expected recommendations' as step;
+
+-- Check 1: lineitem should have a sort recommendation (most filtered table)
+select 'lineitem sort recommendation' as check_name,
+       case when count(*) > 0 then 'PASS' else 'FAIL' end as result
+from vizier.recommendation_store
+where kind = 'rewrite_sorted_table' and table_name = 'lineitem';
+
+-- Check 2: orders should have a sort recommendation
+select 'orders sort recommendation' as check_name,
+       case when count(*) > 0 then 'PASS' else 'FAIL' end as result
+from vizier.recommendation_store
+where kind = 'rewrite_sorted_table' and table_name = 'orders';
+
+-- Check 3: there should be index recommendations for equality predicates
+select 'index recommendations exist' as check_name,
+       case when count(*) > 0 then 'PASS' else 'FAIL' end as result
+from vizier.recommendation_store
+where kind = 'create_index';
+
+-- Check 4: the lineitem sort columns should include l_shipdate
+select 'lineitem sort includes l_shipdate' as check_name,
+       case when count(*) > 0 then 'PASS' else 'FAIL' end as result
+from vizier.recommendation_store
+where kind = 'rewrite_sorted_table'
+  and table_name = 'lineitem'
+  and columns_json like '%l_shipdate%';
+
+-- Check 5: no recommendations for tables entirely outside the workload
+-- (aliases like n1, n2, l1 from TPC-H self-joins are acceptable)
+select 'no unrelated table recommendations' as check_name,
+       case when count(*) = 0 then 'PASS' else 'FAIL' end as result
+from vizier.recommendation_store
+where table_name not in ('lineitem', 'orders', 'customer', 'supplier',
+                          'nation', 'region', 'part', 'partsupp',
+                          'n1', 'n2', 'l1', 'l2', 'l3', '')
+  and kind != 'no_action';
+
+-- Check 6: estimated_rows should be populated (EXPLAIN integration working)
+select 'estimated_rows populated' as check_name,
+       case when count(*) > 0 then 'PASS' else 'FAIL' end as result
+from vizier.workload_queries
+where estimated_rows > 0;
+
+-- Check 7: score ordering is sane (highest-scored recommendations should be
+-- for the most-filtered tables)
+-- Check 7: top recommendation should be for a real TPC-H table
+select 'top recommendation is for a TPC-H table' as check_name,
+       case when table_name in ('lineitem', 'orders', 'customer', 'supplier',
+                                 'nation', 'region', 'part', 'partsupp')
+            then 'PASS' else 'FAIL' end as result
+from vizier.recommendations
+where table_name != ''
+order by score desc
+limit 1;
+
+-- ======================================================================
+-- 6. Explain top recommendations
+-- ======================================================================
+select '>>> Top 3 recommendations explained' as step;
+select * from vizier.explain(1);
+select * from vizier.explain(2);
+select * from vizier.explain(3);
+
+-- ======================================================================
+-- 7. Apply top recommendation and benchmark
+-- ======================================================================
+select '>>> Applying top recommendation (dry run)' as step;
+select * from vizier_apply(1, dry_run => true);
+
+select '>>> Replay workload before applying' as step;
+select * from vizier_replay();
+select queries_replayed, round(replay_total_ms, 2) as replay_ms,
+       overall_verdict
+from vizier.replay_totals;
+
+-- ======================================================================
+-- Summary
+-- ======================================================================
+select '>>> Validation summary' as step;
+select check_name, result from (
+  select 'lineitem sort recommendation' as check_name,
+         case when count(*) > 0 then 'PASS' else 'FAIL' end as result
+  from vizier.recommendation_store
+  where kind = 'rewrite_sorted_table' and table_name = 'lineitem'
+  union all
+  select 'orders sort recommendation',
+         case when count(*) > 0 then 'PASS' else 'FAIL' end
+  from vizier.recommendation_store
+  where kind = 'rewrite_sorted_table' and table_name = 'orders'
+  union all
+  select 'index recommendations exist',
+         case when count(*) > 0 then 'PASS' else 'FAIL' end
+  from vizier.recommendation_store
+  where kind = 'create_index'
+  union all
+  select 'lineitem sort includes l_shipdate',
+         case when count(*) > 0 then 'PASS' else 'FAIL' end
+  from vizier.recommendation_store
+  where kind = 'rewrite_sorted_table'
+    and table_name = 'lineitem'
+    and columns_json like '%l_shipdate%'
+  union all
+  select 'no unrelated table recommendations',
+         case when count(*) = 0 then 'PASS' else 'FAIL' end
+  from vizier.recommendation_store
+  where table_name not in ('lineitem', 'orders', 'customer', 'supplier',
+                            'nation', 'region', 'part', 'partsupp',
+                            'n1', 'n2', 'l1', 'l2', 'l3', '')
+    and kind != 'no_action'
+  union all
+  select 'estimated_rows populated',
+         case when count(*) > 0 then 'PASS' else 'FAIL' end
+  from vizier.workload_queries
+  where estimated_rows > 0
+  union all
+  select 'top recommendation is for a TPC-H table',
+         case when table_name in ('lineitem', 'orders', 'customer', 'supplier',
+                                   'nation', 'region', 'part', 'partsupp')
+              then 'PASS' else 'FAIL' end
+  from (select table_name from vizier.recommendations where table_name != '' order by score desc limit 1)
+);
+
+drop table vizier_tpch_queries;
diff --git a/build.zig.zon b/build.zig.zon
@@ -1,6 +1,6 @@
 .{
     .name = .vizier,
-    .version = "0.1.0-alpha.4",
+    .version = "0.1.0-alpha.5",
     .fingerprint = 0x1d9e65e6e2e75d98, // Changing this has security and trust implications.
     .minimum_zig_version = "0.15.2",
     .dependencies = .{

diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -163,3 +163,22 @@ If you have a DuckDB JSON profiling file:
 select * from vizier_import_profile('/path/to/profile.json');
 select * from vizier_flush();
 ```
+
+## Import from dbt
+
+If you use [dbt](https://www.getdbt.com/) with DuckDB, you can import your dbt run results into Vizier.
+The [import_dbt.py](https://github.com/CogitatorTech/vizier/blob/main/scripts/import_dbt.py) script reads compiled SQL from `manifest.json` and execution timing from `run_results.json`:
+
+```bash
+# Import all successful model runs
+python scripts/import_dbt.py --db my_database.duckdb
+
+# Only import models that took longer than 0.5 seconds
+python scripts/import_dbt.py --db my_database.duckdb --min-time 0.5
+
+# Preview what would be imported without executing
+python scripts/import_dbt.py --db my_database.duckdb --dry-run
+```
+
+By default, the script reads from the `target/` directory.
+Use `--target` to point to a different location.