fix: re-enable tests skipped for Spark 4.1 (issue #4098) (#4253)

andygrove · web-flow · commit 22b7bed1a481 · 2026-05-06T19:09:02.000-06:00
diff --git a/native/common/src/error.rs b/native/common/src/error.rs
@@ -78,7 +78,7 @@ pub enum SparkError {
     #[error("[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
     DivideByZero,
 
-    #[error("[REMAINDER_BY_ZERO] Division by zero. Use `try_remainder` to tolerate divisor being 0 and return NULL instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
+    #[error("[REMAINDER_BY_ZERO] Remainder by zero. Use `try_mod` to tolerate divisor being 0 and return NULL instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.")]
     RemainderByZero,
 
     #[error("[INTERVAL_DIVIDED_BY_ZERO] Divide by zero in interval arithmetic.")]
diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs
@@ -132,3 +132,7 @@ pub(crate) fn decimal_sum_overflow_error(function_name: &str) -> SparkError {
 pub(crate) fn divide_by_zero_error() -> SparkError {
     SparkError::DivideByZero
 }
+
+pub(crate) fn remainder_by_zero_error() -> SparkError {
+    SparkError::RemainderByZero
+}
diff --git a/native/spark-expr/src/math_funcs/modulo_expr.rs b/native/spark-expr/src/math_funcs/modulo_expr.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::{create_comet_physical_fun, IfExpr};
-use crate::{divide_by_zero_error, Cast, EvalMode, SparkCastOptions};
+use crate::{remainder_by_zero_error, Cast, EvalMode, SparkCastOptions};
 use arrow::compute::kernels::numeric::rem;
 use arrow::datatypes::*;
 use datafusion::common::{exec_err, internal_err, DataFusionError, Result, ScalarValue};
@@ -56,8 +56,8 @@ pub fn spark_modulo(args: &[ColumnarValue], fail_on_error: bool) -> Result<Colum
     match apply(lhs, rhs, rem) {
         Ok(result) => Ok(result),
         Err(e) if e.to_string().contains("Divide by zero") && fail_on_error => {
-            // Return Spark-compliant divide by zero error.
-            Err(divide_by_zero_error().into())
+            // Return Spark-compliant remainder by zero error.
+            Err(remainder_by_zero_error().into())
         }
         Err(e) => Err(e),
     }
@@ -245,7 +245,7 @@ mod tests {
                     assert!(
                         error
                             .to_string()
-                            .contains("[DIVIDE_BY_ZERO] Division by zero"),
+                            .contains("[REMAINDER_BY_ZERO] Remainder by zero"),
                         "Error message did not match. Actual message: {error}"
                     );
                 }
diff --git a/spark/src/main/spark-3.4/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala b/spark/src/main/spark-3.4/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala
@@ -78,11 +78,7 @@ trait ShimSparkErrorConverter {
         Some(QueryExecutionErrors.divideByZeroError(sqlCtx(context)))
 
       case "RemainderByZero" =>
-        Some(
-          new SparkException(
-            errorClass = "REMAINDER_BY_ZERO",
-            messageParameters = params.map { case (k, v) => (k, v.toString) },
-            cause = null))
+        Some(QueryExecutionErrors.divideByZeroError(sqlCtx(context)))
 
       case "IntervalDividedByZero" =>
         Some(QueryExecutionErrors.intervalDividedByZeroError(sqlCtx(context)))
diff --git a/spark/src/main/spark-3.5/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala b/spark/src/main/spark-3.5/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala
@@ -78,11 +78,7 @@ trait ShimSparkErrorConverter {
         Some(QueryExecutionErrors.divideByZeroError(sqlCtx(context)))
 
       case "RemainderByZero" =>
-        Some(
-          new SparkException(
-            errorClass = "REMAINDER_BY_ZERO",
-            messageParameters = params.map { case (k, v) => (k, v.toString) },
-            cause = null))
+        Some(QueryExecutionErrors.divideByZeroError(sqlCtx(context)))
 
       case "IntervalDividedByZero" =>
         Some(QueryExecutionErrors.intervalDividedByZeroError(sqlCtx(context)))
diff --git a/spark/src/main/spark-4.x/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala b/spark/src/main/spark-4.x/org/apache/spark/sql/comet/shims/ShimSparkErrorConverter.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupporte
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
+import org.apache.comet.CometSparkSessionExtensions.isSpark41Plus
+
 object ShimSparkErrorConverter {
   val ObjectLocationPattern: Regex = "Object at location (.+?) not found".r
 }
@@ -88,12 +90,15 @@ trait ShimSparkErrorConverter {
         Some(QueryExecutionErrors.divideByZeroError(context.headOption.orNull))
 
       case "RemainderByZero" =>
-        // SPARK 4.0 REMOVED remainderByZeroError  so we use generic arithmetic exception
-        Some(
-          new SparkException(
-            errorClass = "REMAINDER_BY_ZERO",
-            messageParameters = params.map { case (k, v) => (k, v.toString) },
-            cause = null))
+        if (isSpark41Plus) {
+          Some(
+            new SparkException(
+              errorClass = "REMAINDER_BY_ZERO",
+              messageParameters = Map("config" -> "\"spark.sql.ansi.enabled\""),
+              cause = null))
+        } else {
+          Some(QueryExecutionErrors.divideByZeroError(context.headOption.orNull))
+        }
 
       case "IntervalDividedByZero" =>
         Some(QueryExecutionErrors.intervalDividedByZeroError(context.headOption.orNull))
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -33,7 +33,6 @@ import org.apache.spark.sql.functions.{col, monotonically_increasing_id}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, DataType, DataTypes, DateType, DecimalType, DoubleType, FloatType, IntegerType, LongType, ShortType, StringType, StructField, StructType, TimestampType}
 
-import org.apache.comet.CometSparkSessionExtensions.isSpark41Plus
 import org.apache.comet.expressions.{CometCast, CometEvalMode}
 import org.apache.comet.rules.CometScanTypeChecker
 import org.apache.comet.serde.{Compatible, Incompatible}
@@ -525,7 +524,6 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast FloatType to TimestampType") {
-    assume(!isSpark41Plus, "https://github.com/apache/datafusion-comet/issues/4098")
     representativeTimezones.foreach { tz =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
         // Use useDFDiff to avoid collect() which fails on extreme timestamp values
@@ -591,7 +589,6 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast DoubleType to TimestampType") {
-    assume(!isSpark41Plus, "https://github.com/apache/datafusion-comet/issues/4098")
     representativeTimezones.foreach { tz =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
         // Use useDFDiff to avoid collect() which fails on extreme timestamp values
@@ -1568,7 +1565,6 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast ArrayType to ArrayType") {
-    assume(!isSpark41Plus, "https://github.com/apache/datafusion-comet/issues/4098")
     val types = Seq(
       BooleanType,
       StringType,
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -1982,7 +1982,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("remainder function") {
-    assume(!isSpark41Plus, "https://github.com/apache/datafusion-comet/issues/4098")
     def withAnsiMode(enabled: Boolean)(f: => Unit): Unit = {
       withSQLConf(
         SQLConf.ANSI_ENABLED.key -> enabled.toString,
@@ -1992,6 +1991,8 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
 
     def verifyResult(query: String): Unit = {
       // Spark 4.1 introduced REMAINDER_BY_ZERO; older versions raise DIVIDE_BY_ZERO for `%`.
+      // Comet always raises REMAINDER_BY_ZERO natively but the JVM shim maps it to
+      // DIVIDE_BY_ZERO on Spark < 4.1 (where that error class does not exist).
       val expectedError =
         if (isSpark41Plus)
           "[REMAINDER_BY_ZERO] Remainder by zero. Use `try_mod` to tolerate divisor being 0 and return NULL instead."

Original file line number	Diff line number	Diff line change
`@@ -132,3 +132,7 @@ pub(crate) fn decimal_sum_overflow_error(function_name: &str) -> SparkError {`
`132`	`132`	`pub(crate) fn divide_by_zero_error() -> SparkError {`
`133`	`133`	`SparkError::DivideByZero`
`134`	`134`	`}`
	`135`	`+`
	`136`	`+pub(crate) fn remainder_by_zero_error() -> SparkError {`
	`137`	`+ SparkError::RemainderByZero`
	`138`	`+}`