Skip to content

Commit c575989

Browse files
committed
chore: wire time functions + slice, shift
1 parent b993c0f commit c575989

9 files changed

Lines changed: 287 additions & 17 deletions

File tree

docs/source/contributor-guide/spark_expressions_support.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@
151151
- [x] get
152152
- [ ] sequence
153153
- [ ] shuffle
154-
- [ ] slice
154+
- [x] slice
155155
- [x] sort_array
156156

157157
### bitwise_funcs
@@ -165,7 +165,7 @@
165165
- [x] bit_get
166166
- [x] getbit
167167
- [x] shiftright
168-
- [ ] shiftrightunsigned
168+
- [x] shiftrightunsigned
169169
- [x] `|`
170170
- [x] `~`
171171

@@ -214,7 +214,7 @@
214214

215215
### datetime_funcs
216216

217-
- [ ] add_months
217+
- [x] add_months
218218
- [ ] convert_timezone
219219
- [ ] curdate
220220
- [ ] current_date
@@ -286,9 +286,9 @@
286286
- [ ] try_to_time
287287
- [ ] try_to_timestamp
288288
- [x] unix_date
289-
- [ ] unix_micros
290-
- [ ] unix_millis
291-
- [ ] unix_seconds
289+
- [x] unix_micros
290+
- [x] unix_millis
291+
- [x] unix_seconds
292292
- [x] unix_timestamp
293293
- [x] weekday
294294
- [x] weekofyear

docs/source/user-guide/latest/expressions.md

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ of expressions that be disabled.
101101

102102
| Expression | SQL |
103103
| ---------------- | ---------------------------- |
104+
| AddMonths | `add_months` |
104105
| CurrentTimeZone | `current_timezone` |
105106
| DateAdd | `date_add` |
106107
| DateDiff | `datediff` |
@@ -122,6 +123,9 @@ of expressions that be disabled.
122123
| TruncDate | `trunc` |
123124
| TruncTimestamp | `date_trunc` |
124125
| UnixDate | `unix_date` |
126+
| UnixMicros | `unix_micros` |
127+
| UnixMillis | `unix_millis` |
128+
| UnixSeconds | `unix_seconds` |
125129
| UnixTimestamp | `unix_timestamp` |
126130
| Year | `year` |
127131
| Month | `month` |
@@ -201,16 +205,17 @@ of expressions that be disabled.
201205

202206
## Bitwise Expressions
203207

204-
| Expression | SQL |
205-
| ------------ | ---- |
206-
| BitwiseAnd | `&` |
207-
| BitwiseCount | |
208-
| BitwiseGet | |
209-
| BitwiseOr | `\|` |
210-
| BitwiseNot | `~` |
211-
| BitwiseXor | `^` |
212-
| ShiftLeft | `<<` |
213-
| ShiftRight | `>>` |
208+
| Expression | SQL |
209+
| ------------------ | ----- |
210+
| BitwiseAnd | `&` |
211+
| BitwiseCount | |
212+
| BitwiseGet | |
213+
| BitwiseOr | `\|` |
214+
| BitwiseNot | `~` |
215+
| BitwiseXor | `^` |
216+
| ShiftLeft | `<<` |
217+
| ShiftRight | `>>` |
218+
| ShiftRightUnsigned | `>>>` |
214219

215220
## Aggregate Expressions
216221

@@ -281,6 +286,7 @@ Comet supports using the following aggregate functions within window contexts wi
281286
| Flatten |
282287
| GetArrayItem |
283288
| Size |
289+
| Slice |
284290
| SortArray |
285291

286292
## Map Expressions

native/core/src/execution/jni_api.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,19 @@ use datafusion::{
4343
};
4444
use datafusion_comet_proto::spark_operator::Operator;
4545
use datafusion_spark::function::array::array_contains::SparkArrayContains;
46+
use datafusion_spark::function::array::slice::SparkSlice;
4647
use datafusion_spark::function::bitwise::bit_count::SparkBitCount;
4748
use datafusion_spark::function::bitwise::bit_get::SparkBitGet;
49+
use datafusion_spark::function::bitwise::bit_shift::SparkBitShift;
4850
use datafusion_spark::function::bitwise::bitwise_not::SparkBitwiseNot;
51+
use datafusion_spark::function::datetime::add_months::SparkAddMonths;
4952
use datafusion_spark::function::datetime::date_add::SparkDateAdd;
5053
use datafusion_spark::function::datetime::date_sub::SparkDateSub;
5154
use datafusion_spark::function::datetime::from_utc_timestamp::SparkFromUtcTimestamp;
5255
use datafusion_spark::function::datetime::last_day::SparkLastDay;
5356
use datafusion_spark::function::datetime::next_day::SparkNextDay;
5457
use datafusion_spark::function::datetime::to_utc_timestamp::SparkToUtcTimestamp;
58+
use datafusion_spark::function::datetime::unix::SparkUnixTimestamp;
5559
use datafusion_spark::function::hash::crc32::SparkCrc32;
5660
use datafusion_spark::function::hash::sha1::SparkSha1;
5761
use datafusion_spark::function::hash::sha2::SparkSha2;
@@ -599,6 +603,12 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) {
599603
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkTryUrlDecode::default()));
600604
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkCsc::default()));
601605
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkFactorial::default()));
606+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkAddMonths::default()));
607+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSlice::default()));
608+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitShift::right_unsigned()));
609+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkUnixTimestamp::microseconds()));
610+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkUnixTimestamp::milliseconds()));
611+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkUnixTimestamp::seconds()));
602612
}
603613

604614
/// Prepares arrow arrays for output.

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
6363
classOf[ArrayPosition] -> CometArrayPosition,
6464
classOf[ArrayRemove] -> CometArrayRemove,
6565
classOf[ArrayRepeat] -> CometArrayRepeat,
66+
classOf[Slice] -> CometScalarFunction("slice"),
6667
classOf[SortArray] -> CometSortArray,
6768
classOf[ArraysOverlap] -> CometArraysOverlap,
6869
classOf[ArrayUnion] -> CometArrayUnion,
@@ -213,10 +214,12 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
213214
classOf[BitwiseNot] -> CometBitwiseNot,
214215
classOf[BitwiseXor] -> CometBitwiseXor,
215216
classOf[ShiftLeft] -> CometShiftLeft,
216-
classOf[ShiftRight] -> CometShiftRight)
217+
classOf[ShiftRight] -> CometShiftRight,
218+
classOf[ShiftRightUnsigned] -> CometScalarFunction("shiftrightunsigned"))
217219

218220
private[comet] val temporalExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] =
219221
Map(
222+
classOf[AddMonths] -> CometScalarFunction("add_months"),
220223
classOf[DateAdd] -> CometDateAdd,
221224
classOf[DateDiff] -> CometDateDiff,
222225
classOf[DateFormatClass] -> CometDateFormat,
@@ -225,6 +228,9 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
225228
classOf[Hours] -> CometHours,
226229
classOf[DateSub] -> CometDateSub,
227230
classOf[UnixDate] -> CometUnixDate,
231+
classOf[UnixMicros] -> CometScalarFunction("unix_micros"),
232+
classOf[UnixMillis] -> CometScalarFunction("unix_millis"),
233+
classOf[UnixSeconds] -> CometScalarFunction("unix_seconds"),
228234
classOf[FromUnixTime] -> CometFromUnixTime,
229235
classOf[FromUTCTimestamp] -> CometFromUTCTimestamp,
230236
classOf[ToUTCTimestamp] -> CometToUTCTimestamp,
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ConfigMatrix: parquet.enable.dictionary=false,true
19+
20+
statement
21+
CREATE TABLE test_slice(arr array<int>, s int, l int) USING parquet
22+
23+
statement
24+
INSERT INTO test_slice VALUES
25+
(array(1, 2, 3, 4, 5), 2, 3),
26+
(array(1, 2, 3, 4, 5), 1, 5),
27+
(array(1, 2, 3, 4, 5), 1, 10),
28+
(array(1, 2, 3, 4, 5), 3, 0),
29+
(array(1, 2, 3, 4, 5), -2, 2),
30+
(array(1, 2, 3, 4, 5), -10, 2),
31+
(array(1, 2, 3, 4, 5), 10, 2),
32+
(array(1, NULL, 3, NULL, 5), 1, 5),
33+
(array(), 1, 3),
34+
(NULL, 1, 3),
35+
(array(1, 2, 3), NULL, 2),
36+
(array(1, 2, 3), 1, NULL)
37+
38+
-- column array, column start, column length
39+
query
40+
SELECT slice(arr, s, l) FROM test_slice
41+
42+
-- column array, literal start and length
43+
query
44+
SELECT slice(arr, 2, 2) FROM test_slice
45+
46+
-- column array, negative literal start
47+
query
48+
SELECT slice(arr, -1, 1) FROM test_slice
49+
50+
-- string element type
51+
statement
52+
CREATE TABLE test_slice_string(arr array<string>) USING parquet
53+
54+
statement
55+
INSERT INTO test_slice_string VALUES
56+
(array('a', 'b', 'c', 'd')),
57+
(array('é', '日本', '', 'x')),
58+
(array('a', NULL, 'c')),
59+
(NULL)
60+
61+
query
62+
SELECT slice(arr, 1, 2) FROM test_slice_string
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ConfigMatrix: parquet.enable.dictionary=false,true
19+
20+
-- Spark's ShiftRightUnsigned: first arg is Int or Long, second is Int.
21+
-- Returns the same integer type as the first argument. Shift amount is
22+
-- normalized to the bit width (Java semantics) for negative/large shifts.
23+
24+
statement
25+
CREATE TABLE test_shiftrightunsigned_int(v int, s int) USING parquet
26+
27+
statement
28+
INSERT INTO test_shiftrightunsigned_int VALUES
29+
(1, 1),
30+
(-1, 1),
31+
(8, 2),
32+
(2147483647, 1),
33+
(-2147483648, 1),
34+
(0, 0),
35+
(1, 0),
36+
(1, 31),
37+
(1, 32),
38+
(1, 33),
39+
(1, -1),
40+
(NULL, 1),
41+
(1, NULL)
42+
43+
query
44+
SELECT shiftrightunsigned(v, s) FROM test_shiftrightunsigned_int
45+
46+
statement
47+
CREATE TABLE test_shiftrightunsigned_long(v bigint, s int) USING parquet
48+
49+
statement
50+
INSERT INTO test_shiftrightunsigned_long VALUES
51+
(1, 1),
52+
(-1, 1),
53+
(9223372036854775807, 1),
54+
(-9223372036854775808, 1),
55+
(0, 0),
56+
(1, 63),
57+
(1, 64),
58+
(1, -1),
59+
(NULL, 1),
60+
(1, NULL)
61+
62+
query
63+
SELECT shiftrightunsigned(v, s) FROM test_shiftrightunsigned_long
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- The result is microseconds since epoch in UTC, so it must not depend on the
19+
-- session timezone.
20+
-- ConfigMatrix: spark.sql.session.timeZone=UTC,America/Los_Angeles
21+
22+
statement
23+
CREATE TABLE test_unix_micros(ts timestamp) USING parquet
24+
25+
statement
26+
INSERT INTO test_unix_micros VALUES
27+
(timestamp('1970-01-01 00:00:00')),
28+
(timestamp('2024-01-15 12:34:56.123456')),
29+
(timestamp('1969-12-31 23:59:59.999999')),
30+
(timestamp('9999-12-31 23:59:59.999999')),
31+
(timestamp('0001-01-01 00:00:00')),
32+
(NULL)
33+
34+
query
35+
SELECT unix_micros(ts) FROM test_unix_micros
36+
37+
-- literal arguments
38+
query
39+
SELECT unix_micros(timestamp('1970-01-01 00:00:00')),
40+
unix_micros(timestamp('2024-01-15 12:34:56.123456')),
41+
unix_micros(NULL)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- The result is milliseconds since epoch in UTC, so it must not depend on the
19+
-- session timezone.
20+
-- ConfigMatrix: spark.sql.session.timeZone=UTC,America/Los_Angeles
21+
22+
statement
23+
CREATE TABLE test_unix_millis(ts timestamp) USING parquet
24+
25+
statement
26+
INSERT INTO test_unix_millis VALUES
27+
(timestamp('1970-01-01 00:00:00')),
28+
(timestamp('2024-01-15 12:34:56.123456')),
29+
(timestamp('1969-12-31 23:59:59.999999')),
30+
(timestamp('9999-12-31 23:59:59.999999')),
31+
(timestamp('0001-01-01 00:00:00')),
32+
(NULL)
33+
34+
query
35+
SELECT unix_millis(ts) FROM test_unix_millis
36+
37+
-- literal arguments
38+
query
39+
SELECT unix_millis(timestamp('1970-01-01 00:00:00')),
40+
unix_millis(timestamp('2024-01-15 12:34:56.123456')),
41+
unix_millis(NULL)

0 commit comments

Comments
 (0)