Skip to content

Commit 5f54287

Browse files
committed
test(sql): cover cluster-key join cost factor
1 parent 146b11c commit 5f54287

1 file changed

Lines changed: 215 additions & 0 deletions

File tree

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
statement ok
2+
set ddl_column_type_nullable = 0;
3+
4+
statement ok
5+
drop database if exists join_reorder_cluster_key_cost;
6+
7+
statement ok
8+
create database join_reorder_cluster_key_cost;
9+
10+
statement ok
11+
use join_reorder_cluster_key_cost;
12+
13+
statement ok
14+
create table clustered_side(
15+
join_key int,
16+
filter_key int,
17+
payload int
18+
) cluster by(filter_key);
19+
20+
statement ok
21+
insert into clustered_side
22+
select
23+
number % 100,
24+
number % 10,
25+
number
26+
from numbers(1000);
27+
28+
statement ok
29+
create table plain_side(
30+
join_key int,
31+
filter_key int,
32+
payload int
33+
);
34+
35+
statement ok
36+
insert into plain_side
37+
select
38+
number % 100,
39+
number % 10,
40+
number
41+
from numbers(1000);
42+
43+
statement ok
44+
analyze table clustered_side;
45+
46+
statement ok
47+
analyze table plain_side;
48+
49+
# Isolate the cluster-key filter discount from TPCH data shape.
50+
# With the old formula disabled by factor 0, equal cardinality keeps the plain
51+
# table as build side. Factor 85 discounts the clustered-side filter and flips
52+
# the build side. Factor 100 is the neutral discount point and should not take
53+
# the discounted build-side path.
54+
statement ok
55+
set cost_factor_cluster_key = 0;
56+
57+
query T
58+
explain join select *
59+
from clustered_side
60+
join plain_side on clustered_side.join_key = plain_side.join_key
61+
where clustered_side.filter_key = 1
62+
and plain_side.filter_key = 1;
63+
----
64+
HashJoin: INNER
65+
├── Build
66+
│ └── Scan: default.join_reorder_cluster_key_cost.plain_side (#1) (read rows: 1000)
67+
└── Probe
68+
└── Scan: default.join_reorder_cluster_key_cost.clustered_side (#0) (read rows: 1000)
69+
70+
statement ok
71+
set cost_factor_cluster_key = 85;
72+
73+
query T
74+
explain join select *
75+
from clustered_side
76+
join plain_side on clustered_side.join_key = plain_side.join_key
77+
where clustered_side.filter_key = 1
78+
and plain_side.filter_key = 1;
79+
----
80+
HashJoin: INNER
81+
├── Build
82+
│ └── Scan: default.join_reorder_cluster_key_cost.clustered_side (#0) (read rows: 1000)
83+
└── Probe
84+
└── Scan: default.join_reorder_cluster_key_cost.plain_side (#1) (read rows: 1000)
85+
86+
statement ok
87+
set cost_factor_cluster_key = 100;
88+
89+
query T
90+
explain join select *
91+
from clustered_side
92+
join plain_side on clustered_side.join_key = plain_side.join_key
93+
where clustered_side.filter_key = 1
94+
and plain_side.filter_key = 1;
95+
----
96+
HashJoin: INNER
97+
├── Build
98+
│ └── Scan: default.join_reorder_cluster_key_cost.plain_side (#1) (read rows: 1000)
99+
└── Probe
100+
└── Scan: default.join_reorder_cluster_key_cost.clustered_side (#0) (read rows: 1000)
101+
102+
statement ok
103+
create or replace table a(k1 BIGINT, k2 BIGINT, v BIGINT) cluster by(k1, k2);
104+
105+
statement ok
106+
insert into a
107+
select number, number, number
108+
from numbers(1000);
109+
110+
statement ok
111+
create or replace table b(k1 BIGINT, k2 BIGINT, v BIGINT);
112+
113+
statement ok
114+
insert into b
115+
select number, number, number
116+
from numbers(1000);
117+
118+
statement ok
119+
create or replace table c(k1 BIGINT, k2 BIGINT, v BIGINT);
120+
121+
statement ok
122+
insert into c
123+
select number, number, number
124+
from numbers(1000);
125+
126+
statement ok
127+
analyze table a;
128+
129+
statement ok
130+
analyze table b;
131+
132+
statement ok
133+
analyze table c;
134+
135+
statement ok
136+
set cost_factor_cluster_key = 85;
137+
138+
# k1_k2_prefix from cluster_key_join_order.rs.
139+
query T
140+
explain join
141+
SELECT *
142+
FROM a
143+
JOIN b ON a.k1 = b.k1
144+
JOIN c ON a.k2 = c.k2;
145+
----
146+
HashJoin: INNER
147+
├── Build
148+
│ └── Scan: default.join_reorder_cluster_key_cost.b (#1) (read rows: 1000)
149+
└── Probe
150+
└── HashJoin: INNER
151+
├── Build
152+
│ └── Scan: default.join_reorder_cluster_key_cost.c (#2) (read rows: 1000)
153+
└── Probe
154+
└── Scan: default.join_reorder_cluster_key_cost.a (#0) (read rows: 1000)
155+
156+
statement ok
157+
create or replace table a(k1 BIGINT, k2 BIGINT, v BIGINT) cluster by(k2, k1);
158+
159+
statement ok
160+
insert into a
161+
select number, number, number
162+
from numbers(1000);
163+
164+
statement ok
165+
analyze table a;
166+
167+
# k2_k1_prefix from cluster_key_join_order.rs.
168+
query T
169+
explain join
170+
SELECT *
171+
FROM a
172+
JOIN b ON a.k1 = b.k1
173+
JOIN c ON a.k2 = c.k2;
174+
----
175+
HashJoin: INNER
176+
├── Build
177+
│ └── Scan: default.join_reorder_cluster_key_cost.c (#2) (read rows: 1000)
178+
└── Probe
179+
└── HashJoin: INNER
180+
├── Build
181+
│ └── Scan: default.join_reorder_cluster_key_cost.b (#1) (read rows: 1000)
182+
└── Probe
183+
└── Scan: default.join_reorder_cluster_key_cost.a (#0) (read rows: 1000)
184+
185+
statement ok
186+
create or replace table a(k1 BIGINT, k2 BIGINT, v BIGINT) cluster by(k1, k2);
187+
188+
statement ok
189+
insert into a
190+
select number, number, number
191+
from numbers(1000);
192+
193+
statement ok
194+
analyze table a;
195+
196+
# filter_preserves_cluster_keys from cluster_key_join_order.rs.
197+
query T
198+
explain join
199+
SELECT *
200+
FROM (SELECT * FROM a WHERE v >= 0) a
201+
JOIN b ON a.k1 = b.k1
202+
JOIN c ON a.k2 = c.k2;
203+
----
204+
HashJoin: INNER
205+
├── Build
206+
│ └── Scan: default.join_reorder_cluster_key_cost.b (#1) (read rows: 1000)
207+
└── Probe
208+
└── HashJoin: INNER
209+
├── Build
210+
│ └── Scan: default.join_reorder_cluster_key_cost.c (#2) (read rows: 1000)
211+
└── Probe
212+
└── Scan: default.join_reorder_cluster_key_cost.a (#0) (read rows: 1000)
213+
214+
statement ok
215+
unset cost_factor_cluster_key;

0 commit comments

Comments
 (0)