@@ -17,60 +17,104 @@ create or replace table t(c int, s string) compression = 'lz4';
17
17
statement ok
18
18
insert into t_encoded(c, s) select number as c, to_string(number) as s from numbers(500000);
19
19
20
+ statement ok
21
+ optimize table t_encoded compact;
22
+
20
23
statement ok
21
24
insert into t(c, s) select number as c, to_string(number) as s from numbers(500000);
22
25
23
- query T
24
- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 't_encoded');
25
- ----
26
- 2.30 MiB
26
+ statement ok
27
+ optimize table t compact;
27
28
29
+ # In this case, lz4 with encoding produces smaller block files
28
30
query T
29
- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 't');
31
+ with
32
+ e as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 't_encoded') limit 1),
33
+ p as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 't') limit 1)
34
+ select e.c < p.c from e, p
30
35
----
31
- 3.91 MiB
36
+ 1
37
+
32
38
33
39
################################
34
40
# Alter table parquet encoding #
35
41
################################
36
42
43
+
44
+ # 1. prepare plain encoded data and keep the file size
37
45
statement ok
38
46
create or replace table tbl (c int, s string) compression = 'lz4';
39
47
40
48
statement ok
41
49
insert into tbl(c, s) select number as c, to_string(number) as s from numbers(500000);
42
50
43
- query T
44
- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 'tbl');
45
- ----
46
- 3.91 MiB
51
+ # insertion might be executed in a distributed manner, in this case, data blocks might be fragmented
52
+ statement ok
53
+ optimize table tbl compact;
54
+
55
+ statement ok
56
+ create temp table tbl_size(s uint64);
57
+
58
+ statement ok
59
+ insert into tbl_size select bytes_compressed from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1;
60
+
61
+
62
+ # 2. truncate table data and insert the same data with parquet encoding enabled
63
+ statement ok
64
+ truncate table tbl;
47
65
48
66
statement ok
49
67
ALTER TABLE tbl SET OPTIONS (enable_parquet_encoding = 'true');
50
68
51
69
statement ok
52
70
insert into tbl(c, s) select number as c, to_string(number) as s from numbers(500000);
53
71
54
- # newly created block should be smaller, since enable_parquet_encoding is 'true'
72
+ # insertion might be executed in a distributed manner, in this case, data blocks might be fragmented, let's compact them
73
+ statement ok
74
+ optimize table tbl compact;
75
+
76
+
77
+ # 3. check that file size of newly created blocks with encoding is smaller
78
+
55
79
query T
56
- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 'tbl');
80
+ with
81
+ e as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1),
82
+ p as (select s as c from tbl_size)
83
+ select e.c < p.c from e,p
57
84
----
58
- 2.30 MiB
59
- 3.91 MiB
85
+ 1
86
+
87
+ # keep the size, will be used later
88
+ statement ok
89
+ create temp table e_tbl_size(s uint64);
90
+
91
+ statement ok
92
+ insert into e_tbl_size select bytes_compressed from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1;
93
+
94
+ # 4. check that table option `enable_parquet_encoding` could be turned off
95
+
96
+ statement ok
97
+ truncate table tbl;
60
98
61
99
statement ok
62
100
ALTER TABLE tbl SET OPTIONS (enable_parquet_encoding = 'false');
63
101
64
102
statement ok
65
103
insert into tbl(c, s) select number as c, to_string(number) as s from numbers(500000);
66
104
67
- # newly created block should be larger, since enable_parquet_encoding is 'false'
105
+ statement ok
106
+ optimize table tbl compact;
107
+
108
+
109
+ # 3. check that file size of newly created blocks with encoding is smaller
68
110
query T
69
- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 'tbl');
111
+ with
112
+ p as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1),
113
+ e as (select s as c from e_tbl_size)
114
+ select e.c < p.c from e,p
70
115
----
71
- 3.91 MiB
72
- 2.30 MiB
73
- 3.91 MiB
116
+ 1
117
+
74
118
75
119
# Test invalid option value
76
120
0 commit comments