Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions spark/src/main/scala/org/apache/comet/serde/aggregates.scala
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ object CometAverage extends CometAggregateExpressionSerde[Average] {

object CometSum extends CometAggregateExpressionSerde[Sum] {

override def getIncompatibleReasons(): Seq[String] = Seq("Falls back to Spark in ANSI mode.")

Comment thread
coderfender marked this conversation as resolved.
override def convert(
aggExpr: AggregateExpression,
sum: Sum,
Expand Down
4 changes: 0 additions & 4 deletions spark/src/main/scala/org/apache/comet/serde/strings.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInter

object CometStringRepeat extends CometExpressionSerde[StringRepeat] {

override def getCompatibleNotes(): Seq[String] = Seq(
"A negative argument for the number of times to repeat throws an exception" +
" instead of returning an empty string as Spark does")

override def convert(
expr: StringRepeat,
inputs: Seq[Attribute],
Expand Down
35 changes: 28 additions & 7 deletions spark/src/main/scala/org/apache/comet/serde/unixtime.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package org.apache.comet.serde

import org.apache.spark.sql.catalyst.expressions.{Attribute, FromUnixTime, Literal}
import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.types.StringType

import org.apache.comet.CometSparkSessionExtensions.withInfo
import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto}
Expand All @@ -29,17 +30,40 @@ import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithIn
// https://github.com/apache/datafusion/issues/16594
object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] {

override def getUnsupportedReasons(): Seq[String] = Seq(
"Only supports the default datetime format pattern `yyyy-MM-dd HH:mm:ss`")

override def getIncompatibleReasons(): Seq[String] = Seq(
"Only supports the default datetime format pattern `yyyy-MM-dd HH:mm:ss`." +
" DataFusion's valid timestamp range differs from Spark" +
"DataFusion's valid timestamp range differs from Spark" +
" (https://github.com/apache/datafusion/issues/16594)")

override def getSupportLevel(expr: FromUnixTime): SupportLevel = Incompatible(None)
override def getSupportLevel(expr: FromUnixTime): SupportLevel = {
expr.format match {
case Literal(null, _) =>
Compatible(None)
case Literal(fmt, _) =>
val formatStr = fmt.toString
Comment thread
coderfender marked this conversation as resolved.
val defaultPattern = TimestampFormatter.defaultPattern
if (formatStr == defaultPattern) {
Incompatible(None)
} else {
Comment thread
coderfender marked this conversation as resolved.
Unsupported(Some(s"Datetime pattern format: $formatStr is unsupported"))
}
case _ =>
Unsupported(Some("Non-literal datetime pattern format is unsupported"))
}
Comment thread
coderfender marked this conversation as resolved.
}

override def convert(
expr: FromUnixTime,
inputs: Seq[Attribute],
binding: Boolean): Option[ExprOuterClass.Expr] = {
expr.format match {
case Literal(null, _) =>
// from_unixtime is null-intolerant, so NULL format yields NULL
return exprToProtoInternal(Literal.create(null, StringType), inputs, binding)
case _ =>
}
val secExpr = exprToProtoInternal(expr.sec, inputs, binding)
// TODO: DataFusion toChar does not support Spark datetime pattern format
// https://github.com/apache/datafusion/issues/16577
Expand All @@ -48,10 +72,7 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] {
val formatExpr = exprToProtoInternal(Literal("%Y-%m-%d %H:%M:%S"), inputs, binding)
val timeZone = exprToProtoInternal(Literal(expr.timeZoneId.orNull), inputs, binding)

if (expr.format != Literal(TimestampFormatter.defaultPattern)) {
withInfo(expr, "Datetime pattern format is unsupported")
None
} else if (secExpr.isDefined && formatExpr.isDefined) {
if (secExpr.isDefined && formatExpr.isDefined) {
val timestampExpr =
scalarFunctionExprToProto("from_unixtime", Seq(secExpr, timeZone): _*)
val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,22 @@ INSERT INTO test_from_unix_time VALUES (0), (1718451045), (-1), (NULL), (2147483
query expect_fallback(not fully compatible with Spark)
SELECT from_unixtime(t) FROM test_from_unix_time

query expect_fallback(not fully compatible with Spark)
query expect_fallback(Datetime pattern format: yyyy-MM-dd is unsupported)
SELECT from_unixtime(t, 'yyyy-MM-dd') FROM test_from_unix_time
Comment thread
coderfender marked this conversation as resolved.

-- literal arguments
query expect_fallback(not fully compatible with Spark)
SELECT from_unixtime(0)

query expect_fallback(not fully compatible with Spark)
query expect_fallback(Datetime pattern format: yyyy-MM-dd is unsupported)
SELECT from_unixtime(1718451045, 'yyyy-MM-dd')

-- null format literal
query
SELECT from_unixtime(t, NULL) FROM test_from_unix_time

query
SELECT from_unixtime(1718451045, NULL)

query
SELECT from_unixtime(1718451045, CAST(NULL AS STRING))
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,15 @@ SELECT repeat('hi', n) FROM test_repeat
-- literal + literal
query
SELECT repeat('hi', 3), repeat('', 5), repeat('a', 0), repeat(NULL, 3)

-- non-positive literal count
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add non-literal test as well - that does not seem to be covered in the code change

query
SELECT repeat('namaste', -1), repeat('namaste', -100), repeat('a', 0), repeat(NULL, -1)

-- non-positive literal count over a column
query
SELECT repeat(s, -1), repeat(s, 0) FROM test_repeat

-- non-literal count (column expression)
query
SELECT repeat(s, -n) FROM test_repeat
Loading