apache · comphead · Jun 3, 2026 · May 21, 2026 · May 30, 2026 · Jun 2, 2026
diff --git a/native/spark-expr/src/conversion_funcs/numeric.rs b/native/spark-expr/src/conversion_funcs/numeric.rs
@@ -134,7 +134,7 @@ macro_rules! cast_float_to_timestamp_impl {
 }
 
 macro_rules! cast_float_to_string {
-    ($from:expr, $eval_mode:expr, $type:ty, $output_type:ty, $offset_type:ty) => {{
+    ($from:expr, $eval_mode:expr, $type:ty, $output_type:ty, $offset_type:ty, $min_value:expr) => {{
 
         fn cast<OffsetSize>(
             from: &dyn Array,
@@ -173,20 +173,27 @@ macro_rules! cast_float_to_string {
                             if value.abs() >= UPPER_SCIENTIFIC_BOUND
                                 || value.abs() < LOWER_SCIENTIFIC_BOUND =>
                         {
-                            let formatted = format!("{value:E}");
-
-                            if formatted.contains(".") {
-                                Ok(Some(formatted))
+                            // Spark uses Java's Float.MIN_VALUE / Double.MIN_VALUE strings for
+                            // the smallest subnormal values; Rust's formatter rounds them more.
+                            if value.abs().to_bits() == 1 {
+                                let sign = if value.is_sign_negative() { "-" } else { "" };
+                                Ok(Some(format!("{sign}{}", $min_value)))
                             } else {
-                                // `formatted` is already in scientific notation and can be split up by E
-                                // in order to add the missing trailing 0 which gets removed for numbers with a fraction of 0.0
-                                let prepare_number: Vec<&str> = formatted.split("E").collect();
+                                let formatted = format!("{value:E}");
+
+                                if formatted.contains(".") {
+                                    Ok(Some(formatted))
+                                } else {
+                                    // `formatted` is already in scientific notation and can be split up by E
+                                    // in order to add the missing trailing 0 which gets removed for numbers with a fraction of 0.0
+                                    let prepare_number: Vec<&str> = formatted.split("E").collect();
 
-                                let coefficient = prepare_number[0];
+                                    let coefficient = prepare_number[0];
 
-                                let exponent = prepare_number[1];
+                                    let exponent = prepare_number[1];
 
-                                Ok(Some(format!("{coefficient}.0E{exponent}")))
+                                    Ok(Some(format!("{coefficient}.0E{exponent}")))
+                                }
                             }
                         }
                         Some(value) => Ok(Some(value.to_string())),
@@ -650,7 +657,7 @@ pub(crate) fn spark_cast_float64_to_utf8<OffsetSize>(
 where
     OffsetSize: OffsetSizeTrait,
 {
-    cast_float_to_string!(from, _eval_mode, f64, Float64Array, OffsetSize)
+    cast_float_to_string!(from, _eval_mode, f64, Float64Array, OffsetSize, "4.9E-324")
 }
 
 pub(crate) fn spark_cast_float32_to_utf8<OffsetSize>(
@@ -660,7 +667,7 @@ pub(crate) fn spark_cast_float32_to_utf8<OffsetSize>(
 where
     OffsetSize: OffsetSizeTrait,
 {
-    cast_float_to_string!(from, _eval_mode, f32, Float32Array, OffsetSize)
+    cast_float_to_string!(from, _eval_mode, f32, Float32Array, OffsetSize, "1.4E-45")
 }
 
 fn cast_int_to_decimal128_internal<T>(
@@ -1117,6 +1124,27 @@ mod tests {
         assert!(result.is_err());
     }
 
+    #[test]
+    fn test_spark_cast_float_min_value_to_string() {
+        let float_array: ArrayRef = Arc::new(Float32Array::from(vec![
+            Some(f32::from_bits(1)),
+            Some(-f32::from_bits(1)),
+        ]));
+        let result = spark_cast_float32_to_utf8::<i32>(&float_array, EvalMode::Legacy).unwrap();
+        let strings = result.as_string::<i32>();
+        assert_eq!(strings.value(0), "1.4E-45");
+        assert_eq!(strings.value(1), "-1.4E-45");
+
+        let double_array: ArrayRef = Arc::new(Float64Array::from(vec![
+            Some(f64::from_bits(1)),
+            Some(-f64::from_bits(1)),
+        ]));
+        let result = spark_cast_float64_to_utf8::<i32>(&double_array, EvalMode::Legacy).unwrap();
+        let strings = result.as_string::<i32>();
+        assert_eq!(strings.value(0), "4.9E-324");
+        assert_eq!(strings.value(1), "-4.9E-324");
+    }
+
     #[test]
     fn test_spark_cast_decimal_to_boolean() {
         let array: ArrayRef = Arc::new(

diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -151,8 +151,6 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
       case (ArrayType(DataTypes.DateType, _), ArrayType(toElementType, _))
           if toElementType != DataTypes.IntegerType && toElementType != DataTypes.StringType =>
         unsupported(fromType, toType)
-      case (dt: ArrayType, DataTypes.StringType) if dt.elementType == DataTypes.BinaryType =>
-        Incompatible()
       case (dt: ArrayType, DataTypes.StringType) =>
         isSupported(dt.elementType, DataTypes.StringType, timeZoneId, evalMode)
       case (dt: ArrayType, dt1: ArrayType) =>
@@ -243,11 +241,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
       case DataTypes.DateType => Compatible()
       case DataTypes.TimestampType => Compatible()
       case DataTypes.FloatType | DataTypes.DoubleType =>
-        Compatible(
-          Some(
-            "There can be differences in precision. " +
-              "For example, the input \"1.4E-45\" will produce 1.0E-45 " +
-              "instead of 1.4E-45"))
+        Compatible(Some("There can be differences in precision"))
       case d: DecimalType if d.scale < 0 =>
         // Negative-scale decimals require spark.sql.legacy.allowNegativeScaleOfDecimal=true.
         // When that config is enabled, Spark formats them using Java BigDecimal.toString()

diff --git a/spark/src/test/resources/sql-tests/expressions/cast/cast_array_to_string.sql b/spark/src/test/resources/sql-tests/expressions/cast/cast_array_to_string.sql
@@ -0,0 +1,62 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+statement
+CREATE TABLE test_cast_array_to_string(
+  id int,
+  floats array<float>,
+  doubles array<double>,
+  binaries array<binary>
+) USING parquet
+
+statement
+INSERT INTO test_cast_array_to_string VALUES
+  (
+    1,
+    array(cast('3.4028235E38' as float), cast('-3.4028235E38' as float), cast('1.4E-45' as float)),
+    array(cast('1.7976931348623157E308' as double), cast('-1.7976931348623157E308' as double), cast('4.9E-324' as double)),
+    array(X'616263', X'', X'0001027F')
+  ),
+  (
+    2,
+    array(cast('NaN' as float), cast('Infinity' as float), cast('-Infinity' as float)),
+    array(cast('NaN' as double), cast('Infinity' as double), cast('-Infinity' as double)),
+    array(cast(null as binary), X'FFFE', X'0A0D')
+  ),
+  (
+    3,
+    array(cast(null as float), cast(-0.0 as float), cast(0.0 as float)),
+    array(cast(null as double), cast(-0.0 as double), cast(0.0 as double)),
+    null
+  ),
+  (
+    4,
+    cast(array() as array<float>),
+    cast(array() as array<double>),
+    cast(array() as array<binary>)
+  )
+
+query
+SELECT cast(floats as string), cast(doubles as string), cast(binaries as string), id
+FROM test_cast_array_to_string
+ORDER BY id
+
+query
+SELECT
+  cast(array(cast('1.4E-45' as float), cast('NaN' as float), cast(null as float)) as string),
+  cast(array(cast('4.9E-324' as double), cast('-Infinity' as double), cast(null as double)) as string),
+  cast(array(X'616263', X'', cast(null as binary)) as string)
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -1555,16 +1555,60 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       IntegerType,
       LongType,
       ShortType,
-      // FloatType,
-      // DoubleType,
-      // BinaryType
+      FloatType,
+      DoubleType,
+      BinaryType,
       DecimalType(10, 2),
       DecimalType(38, 18)).foreach { dt =>
       val input = generateArrays(100, dt)
       castTest(input, StringType, hasIncompatibleType = hasIncompatibleType(input.schema))
     }
   }
 
+  test("cast ArrayType to StringType - float double binary edge cases") {
+    import scala.jdk.CollectionConverters._
+
+    def bytes(values: Int*): Array[Byte] = values.map(_.toByte).toArray
+
+    def arrayInput(elementType: DataType, values: Seq[Any]): DataFrame = {
+      val schema = StructType(Seq(StructField("a", ArrayType(elementType), true)))
+      spark.createDataFrame(values.map(Row(_)).asJava, schema)
+    }
+
+    castTest(
+      arrayInput(
+        FloatType,
+        Seq(
+          Seq[Any](Float.MaxValue, Float.MinValue, Float.MinPositiveValue),
+          Seq[Any](Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity),
+          Seq[Any](null, -0.0f, 0.0f),
+          Seq.empty[Any],
+          null)),
+      StringType)
+
+    castTest(
+      arrayInput(
+        DoubleType,
+        Seq(
+          Seq[Any](Double.MaxValue, Double.MinValue, Double.MinPositiveValue),
+          Seq[Any](Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity),
+          Seq[Any](null, -0.0d, 0.0d),
+          Seq.empty[Any],
+          null)),
+      StringType)
+
+    castTest(
+      arrayInput(
+        BinaryType,
+        Seq(
+          Seq[Any](bytes(97, 98, 99), Array.empty[Byte]),
+          Seq[Any](bytes(0, 1, 2, 127), bytes(-128, -1)),
+          Seq[Any](null, bytes(0xff, 0xfe), bytes(10, 13)),
+          Seq.empty[Any],
+          null)),
+      StringType)
+  }
+
   test("cast ArrayType to ArrayType") {
     val types = Seq(
       BooleanType,