Skip to content

Commit 2e8cc34

Browse files
authored
feat: Support Spark expression: convert_timezone (#4369)
1 parent b993c0f commit 2e8cc34

5 files changed

Lines changed: 74 additions & 2 deletions

File tree

docs/source/contributor-guide/spark_expressions_support.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@
215215
### datetime_funcs
216216

217217
- [ ] add_months
218-
- [ ] convert_timezone
218+
- [x] convert_timezone
219219
- [ ] curdate
220220
- [ ] current_date
221221
- [ ] current_time

docs/source/user-guide/latest/expressions.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ of expressions that be disabled.
101101

102102
| Expression | SQL |
103103
| ---------------- | ---------------------------- |
104+
| ConvertTimezone | `convert_timezone` |
104105
| CurrentTimeZone | `current_timezone` |
105106
| DateAdd | `date_add` |
106107
| DateDiff | `datediff` |

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
217217

218218
private[comet] val temporalExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] =
219219
Map(
220+
classOf[ConvertTimezone] -> CometConvertTimezone,
220221
classOf[DateAdd] -> CometDateAdd,
221222
classOf[DateDiff] -> CometDateDiff,
222223
classOf[DateFormatClass] -> CometDateFormat,

spark/src/main/scala/org/apache/comet/serde/datetime.scala

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ package org.apache.comet.serde
2121

2222
import java.util.Locale
2323

24-
import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, Minute, Month, NextDay, Quarter, Second, SecondsToTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixTimestamp, WeekDay, WeekOfYear, Year}
24+
import org.apache.spark.sql.catalyst.expressions.{Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, Minute, Month, NextDay, Quarter, Second, SecondsToTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixTimestamp, WeekDay, WeekOfYear, Year}
2525
import org.apache.spark.sql.internal.SQLConf
2626
import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType}
2727
import org.apache.spark.unsafe.types.UTF8String
@@ -409,6 +409,27 @@ object CometToUTCTimestamp extends CometExpressionSerde[ToUTCTimestamp] {
409409
}
410410
}
411411

412+
object CometConvertTimezone extends CometExpressionSerde[ConvertTimezone] {
413+
414+
override def getSupportLevel(expr: ConvertTimezone): SupportLevel =
415+
Incompatible(Some(UTCTimestampSerde.tzParseIncompatReason))
416+
417+
override def getIncompatibleReasons(): Seq[String] =
418+
Seq(UTCTimestampSerde.tzParseIncompatReason)
419+
420+
override def convert(
421+
expr: ConvertTimezone,
422+
inputs: Seq[Attribute],
423+
binding: Boolean): Option[ExprOuterClass.Expr] = {
424+
val srcTz = exprToProtoInternal(expr.sourceTz, inputs, binding)
425+
val tgtTz = exprToProtoInternal(expr.targetTz, inputs, binding)
426+
val ts = exprToProtoInternal(expr.sourceTs, inputs, binding)
427+
val toUtc = scalarFunctionExprToProto("to_utc_timestamp", ts, srcTz)
428+
val fromUtc = scalarFunctionExprToProto("from_utc_timestamp", toUtc, tgtTz)
429+
optExprWithInfo(fromUtc, expr, expr.children: _*)
430+
}
431+
}
432+
412433
object CometNextDay extends CometScalarFunction[NextDay]("next_day")
413434

414435
object CometMakeDate extends CometScalarFunction[MakeDate]("make_date")
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- Config: spark.comet.expression.ConvertTimezone.allowIncompatible=true
19+
-- ConfigMatrix: spark.sql.session.timeZone=UTC,America/Los_Angeles
20+
21+
statement
22+
CREATE TABLE test_convert_timezone(ts timestamp_ntz, src string, tgt string) USING parquet
23+
24+
statement
25+
INSERT INTO test_convert_timezone VALUES
26+
(timestamp_ntz'2021-12-06 08:00:00', 'UTC', 'America/Los_Angeles'),
27+
(timestamp_ntz'2021-07-01 12:00:00', 'America/New_York', 'Asia/Tokyo'),
28+
(timestamp_ntz'2023-01-15 09:30:00', 'America/Los_Angeles', 'UTC'),
29+
(NULL, 'UTC', 'Asia/Tokyo'),
30+
(timestamp_ntz'2021-12-06 08:00:00', NULL, 'Asia/Tokyo'),
31+
(timestamp_ntz'2021-12-06 08:00:00', 'UTC', NULL)
32+
33+
query
34+
SELECT convert_timezone('UTC', 'America/Los_Angeles', timestamp_ntz'2021-12-06 08:00:00')
35+
36+
query
37+
SELECT convert_timezone('Asia/Tokyo', 'Europe/Berlin', timestamp_ntz'2021-12-06 12:00:00')
38+
39+
query
40+
SELECT convert_timezone('America/Los_Angeles', 'Asia/Tokyo', timestamp_ntz'2023-01-15 20:00:00')
41+
42+
query
43+
SELECT convert_timezone(CAST(NULL AS STRING), 'Asia/Tokyo', timestamp_ntz'2021-12-06 08:00:00')
44+
45+
query
46+
SELECT convert_timezone('UTC', CAST(NULL AS STRING), timestamp_ntz'2021-12-06 08:00:00')
47+
48+
query
49+
SELECT convert_timezone(src, tgt, ts) FROM test_convert_timezone

0 commit comments

Comments
 (0)