Skip to content

Commit 7076078

Browse files
Merge pull request #149 from apache/null_handling
aggs produce null with null input
2 parents 3bf334f + a23786d commit 7076078

28 files changed

+206
-192
lines changed

cpc/sqlx/cpc_sketch_agg_int64_lgk_seed.sqlx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,10 @@ export function serialize(state) {
8989
}
9090
} else if (state.union != null) {
9191
state.serialized = state.union.getResultAsUint8Array();
92-
} else {
93-
if (state.sketch == null) {
94-
state.sketch = new Module.cpc_sketch(state.lg_k, state.seed);
95-
}
92+
} else if (state.sketch != null) {
9693
state.serialized = state.sketch.serializeAsUint8Array();
94+
} else {
95+
state.serialized = null;
9796
}
9897
return {
9998
lg_k: state.lg_k,

cpc/sqlx/cpc_sketch_agg_string_lgk_seed.sqlx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,10 @@ export function serialize(state) {
8989
}
9090
} else if (state.union != null) {
9191
state.serialized = state.union.getResultAsUint8Array();
92-
} else {
93-
if (state.sketch == null) {
94-
state.sketch = new Module.cpc_sketch(state.lg_k, state.seed);
95-
}
92+
} else if (state.sketch != null) {
9693
state.serialized = state.sketch.serializeAsUint8Array();
94+
} else {
95+
state.serialized = null;
9796
}
9897
return {
9998
lg_k: state.lg_k,

cpc/sqlx/cpc_sketch_agg_union_lgk_seed.sqlx

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,30 @@ export function aggregate(state, sketch) {
7878

7979
export function serialize(state) {
8080
if (state.union == null && state.serialized != null) return state; // for transition deserialize-serialize
81-
ensureUnion(state);
81+
if (state.union != null) {
82+
// for prior transition deserialize-aggregate
83+
// merge aggregated and serialized state
84+
if (state.serialized != null) {
85+
state.union.updateWithBytes(state.serialized, state.seed);
86+
}
87+
state.serialized = state.union.getResultAsUint8Array();
88+
} else {
89+
state.serialized = null;
90+
}
8291
try {
8392
return {
8493
lg_k: state.lg_k,
8594
seed: state.seed,
86-
serialized: state.union.getResultAsUint8Array()
95+
serialized: state.serialized
8796
};
8897
} catch (e) {
8998
if (e.message != null) throw e;
9099
throw new Error(Module.getExceptionMessage(e));
91100
} finally {
92-
state.union.delete();
93-
state.union = null;
101+
if (state.union != null) {
102+
state.union.delete();
103+
delete state.union;
104+
}
94105
}
95106
}
96107

cpc/test/cpc_sketch_test.js

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,22 @@ const { generate_udf_test, generate_udaf_test } = unit_test_utils;
2020

2121
// using defaults
2222

23-
const cpc_empty = `FROM_BASE64('AgEQCwAGzJM=')`;
24-
2523
generate_udaf_test("cpc_sketch_agg_string", {
2624
input_columns: [`str`],
2725
input_rows: `SELECT * FROM UNNEST([CAST(NULL AS STRING), CAST(NULL AS STRING), CAST(NULL AS STRING)]) AS str`,
28-
expected_output: cpc_empty
26+
expected_output: null
2927
});
3028

3129
generate_udaf_test("cpc_sketch_agg_int64", {
3230
input_columns: [`value`],
3331
input_rows: `SELECT * FROM UNNEST([NULL, NULL, NULL]) AS value`,
34-
expected_output: cpc_empty
32+
expected_output: null
3533
});
3634

3735
generate_udaf_test("cpc_sketch_agg_union", {
3836
input_columns: [`sketch`],
3937
input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`,
40-
expected_output: cpc_empty
38+
expected_output: null
4139
});
4240

4341
const cpc_1 = `FROM_BASE64('CAEQCwAOzJMDAAAAAgAAAAAAAAAA+p9AiIAEKIABCEC+FRhuAwAAAA==')`;

fi/sqlx/frequent_strings_sketch_build.sqlx

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,24 +58,26 @@ export function aggregate(state, item, weight) {
5858
export function serialize(state) {
5959
if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize
6060
try {
61-
// for prior transition deserialize-aggregate
62-
// merge aggregated and serialized state
63-
if (state.sketch != null && state.serialized != null) {
64-
state.sketch.merge(state.serialized);
61+
if (state.sketch != null) {
62+
// for prior transition deserialize-aggregate
63+
// merge aggregated and serialized state
64+
if (state.serialized != null) state.sketch.merge(state.serialized);
65+
state.serialized = state.sketch.serializeAsUint8Array();
6566
} else {
66-
if (state.sketch == null) {
67-
state.sketch = new Module.frequent_strings_sketch(state.lg_max_map_size);
68-
}
67+
state.serialized = null;
6968
}
7069
return {
7170
lg_max_map_size: state.lg_max_map_size,
72-
serialized: state.sketch.serializeAsUint8Array()
71+
serialized: state.serialized
7372
};
7473
} catch (e) {
7574
if (e.message != null) throw e;
7675
throw new Error(Module.getExceptionMessage(e));
7776
} finally {
78-
state.sketch.delete();
77+
if (state.sketch != null) {
78+
state.sketch.delete();
79+
delete state.sketch;
80+
}
7981
}
8082
}
8183

fi/sqlx/frequent_strings_sketch_merge.sqlx

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,24 +57,26 @@ export function aggregate(state, sketch) {
5757
export function serialize(state) {
5858
if (state.sketch == null && state.serialized != null) return state; // for transition deserialize-serialize
5959
try {
60-
// for prior transition deserialize-aggregate
61-
// merge aggregated and serialized state
62-
if (state.sketch != null && state.serialized != null) {
63-
state.sketch.merge(state.serialized);
60+
if (state.sketch != null) {
61+
// for prior transition deserialize-aggregate
62+
// merge aggregated and serialized state
63+
if (state.serialized != null) state.sketch.merge(state.serialized);
64+
state.serialized = state.sketch.serializeAsUint8Array();
6465
} else {
65-
if (state.sketch == null) {
66-
state.sketch = new Module.frequent_strings_sketch(state.lg_max_map_size);
67-
}
66+
state.serialized = null;
6867
}
6968
return {
7069
lg_max_map_size: state.lg_max_map_size,
71-
serialized: state.sketch.serializeAsUint8Array()
70+
serialized: state.serialized
7271
};
7372
} catch (e) {
7473
if (e.message != null) throw e;
7574
throw new Error(Module.getExceptionMessage(e));
7675
} finally {
77-
state.sketch.delete();
76+
if (state.sketch != null) {
77+
state.sketch.delete();
78+
delete state.sketch;
79+
}
7880
}
7981
}
8082

fi/test/frequent_strings_sketch_test.js

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,16 @@
1818

1919
const { generate_udf_test, generate_udaf_test } = unit_test_utils;
2020

21-
const fi_empty = `FROM_BASE64('AQEKBQMFAAA=')`;
22-
2321
generate_udaf_test("frequent_strings_sketch_build", {
2422
input_columns: [`str`, `1`, `5 NOT AGGREGATE`],
2523
input_rows: `SELECT * FROM UNNEST([CAST(NULL AS STRING), CAST(NULL AS STRING), CAST(NULL AS STRING)]) AS str`,
26-
expected_output: fi_empty
24+
expected_output: null
2725
});
2826

2927
generate_udaf_test("frequent_strings_sketch_merge", {
3028
input_columns: [`sketch`, `5 NOT AGGREGATE`],
3129
input_rows: `SELECT * FROM UNNEST([CAST(NULL AS BYTES), CAST(NULL AS BYTES), CAST(NULL AS BYTES)]) AS sketch`,
32-
expected_output: fi_empty
30+
expected_output: null
3331
});
3432

3533
const fi_1 = `FROM_BASE64('BAEKBQMAAAADAAAAAAAAAAMAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAYQEAAABiAQAAAGM=')`;

hll/sqlx/hll_sketch_agg_int64_lgk_type.sqlx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,10 @@ export function serialize(state) {
8888
}
8989
} else if (state.union != null) {
9090
state.serialized = state.union.getResultAsUint8Array(state.tgt_type);
91-
} else {
92-
if (state.sketch == null) {
93-
state.sketch = new Module.hll_sketch(state.lg_k, state.tgt_type);
94-
}
91+
} else if (state.sketch != null) {
9592
state.serialized = state.sketch.serializeAsUint8Array();
93+
} else {
94+
state.serialized = null;
9695
}
9796
return {
9897
lg_k: state.lg_k,

hll/sqlx/hll_sketch_agg_string_lgk_type.sqlx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,10 @@ export function serialize(state) {
8888
}
8989
} else if (state.union != null) {
9090
state.serialized = state.union.getResultAsUint8Array(state.tgt_type);
91-
} else {
92-
if (state.sketch == null) {
93-
state.sketch = new Module.hll_sketch(state.lg_k, state.tgt_type);
94-
}
91+
} else if (state.sketch != null) {
9592
state.serialized = state.sketch.serializeAsUint8Array();
93+
} else {
94+
state.serialized = null;
9695
}
9796
return {
9897
lg_k: state.lg_k,

hll/sqlx/hll_sketch_agg_union_lgk_type.sqlx

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,19 +77,28 @@ export function aggregate(state, sketch) {
7777

7878
export function serialize(state) {
7979
if (state.union == null && state.serialized != null) return state; // for transition deserialize-serialize
80-
ensureUnion(state);
80+
if (state.union != null) {
81+
if (state.serialized != null) {
82+
state.union.updateWithBytes(state.serialized);
83+
}
84+
state.serialized = state.union.getResultAsUint8Array(state.tgt_type);
85+
} else {
86+
state.serialized = null;
87+
}
8188
try {
8289
return {
8390
lg_k: state.lg_k,
8491
tgt_type: state.tgt_type,
85-
serialized: state.union.getResultAsUint8Array(state.tgt_type)
92+
serialized: state.serialized
8693
};
8794
} catch (e) {
8895
if (e.message != null) throw e;
8996
throw new Error(Module.getExceptionMessage(e));
9097
} finally {
91-
state.union.delete();
92-
state.union = null;
98+
if (state.union != null) {
99+
state.union.delete();
100+
delete state.union;
101+
}
93102
}
94103
}
95104

0 commit comments

Comments
 (0)