build: upgrade Spark 4.1 to 4.1.2

manuzhang · codex · manuzhang · commit fe31e419bf59 · 2026-05-30T23:40:25.000+08:00
Updates the Spark 4.1 Maven profile, Docker image, docs, and Spark SQL test diff to Spark 4.1.2.

Co-authored-by: Codex &lt;codex@openai.com&gt;
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -242,7 +242,7 @@ jobs:
     uses: ./.github/workflows/spark_sql_test_reusable.yml
     with:
       spark-short: '4.1'
-      spark-full: '4.1.1'
+      spark-full: '4.1.2'
       java: 17
 
   iceberg_1_8:
diff --git a/dev/ci/compute-changes.py b/dev/ci/compute-changes.py
@@ -169,7 +169,7 @@
         "!spark/src/main/spark-4.2/**",
         "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala",
         "spark/pom.xml",
-        "dev/diffs/4.1.1.diff",
+        "dev/diffs/4.1.2.diff",
         "pom.xml",
         "rust-toolchain.toml",
         ".github/workflows/ci.yml",
diff --git a/dev/diffs/4.1.2.diff b/dev/diffs/4.1.2.diff
@@ -39,7 +39,7 @@ index 6df8bc85b51..dabb75e2b75 100644
        withSpark(sc) { sc =>
          TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
 diff --git a/pom.xml b/pom.xml
-index dc757d78812..10f7b202e71 100644
+index dc201151999..3e278cfb34c 100644
 --- a/pom.xml
 +++ b/pom.xml
 @@ -152,6 +152,8 @@
@@ -78,7 +78,7 @@ index dc757d78812..10f7b202e71 100644
        <dependency>
          <groupId>org.apache.datasketches</groupId>
 diff --git a/sql/core/pom.xml b/sql/core/pom.xml
-index d2d07a08aa9..d89f80e5b68 100644
+index c25b83c355b..5e23b863dcf 100644
 --- a/sql/core/pom.xml
 +++ b/sql/core/pom.xml
 @@ -97,6 +97,10 @@
@@ -392,7 +392,7 @@ index 0d807aeae4d..6d7744e771b 100644
  
        withTempView("t0", "t1", "t2") {
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
-index 0dfd37ebeae..66340218c7c 100644
+index bfe15b33768..55c23a38ccc 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
 @@ -31,7 +31,7 @@ import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
@@ -695,10 +695,10 @@ index e1a2fd33c7c..632f4b695df 100644
              }
            assert(scanOption.isDefined)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
-index b27122a8de2..3c690dbe788 100644
+index 4c62c47971a..ecc7ed21546 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
-@@ -267,7 +267,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+@@ -268,7 +268,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
      }
    }
  
@@ -708,7 +708,7 @@ index b27122a8de2..3c690dbe788 100644
      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
        withTempView("df") {
          val df1 = spark.range(1, 100)
-@@ -470,7 +471,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+@@ -471,7 +472,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
      }
    }
  
@@ -718,7 +718,7 @@ index b27122a8de2..3c690dbe788 100644
      withTempDir { dir =>
        Seq("parquet", "orc", "csv", "json").foreach { fmt =>
          val basePath = dir.getCanonicalPath + "/" + fmt
-@@ -548,7 +550,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
+@@ -549,7 +551,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
    }
  }
  
@@ -1276,7 +1276,7 @@ index d7b2511eac2..d5f5b940b94 100644
      val session = classic.SparkSession.builder().sparkContext(sc).getOrCreate()
      import session.implicits._
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
-index ff0ee19ae97..01958e0c45b 100644
+index 7bfc8cf4fa6..7a425b74184 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
 @@ -17,6 +17,8 @@
@@ -1437,7 +1437,7 @@ index fee375db10a..8c2c24e2c5f 100644
      val v = VariantBuilder.parseJson(s, false)
      new VariantVal(v.getValue, v.getMetadata)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
-index 6cdf681d65c..34a0e3714bd 100644
+index 8f7a68bcbe6..88dbe1793c9 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
 @@ -26,6 +26,8 @@ import org.apache.spark.sql.{AnalysisException, Row}
@@ -1593,7 +1593,7 @@ index 2a0ab21ddb0..6030e7c2b9b 100644
          } finally {
            spark.listenerManager.unregister(listener)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
-index 7c830bf6c6e..6d9c643d83e 100644
+index 122c511bf83..9bea26c5225 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
 @@ -24,6 +24,8 @@ import org.apache.spark.sql.{DataFrame, Row}
@@ -1606,7 +1606,7 @@ index 7c830bf6c6e..6d9c643d83e 100644
  import org.apache.spark.sql.connector.catalog.functions._
  import org.apache.spark.sql.connector.distributions.Distributions
 @@ -32,7 +34,7 @@ import org.apache.spark.sql.connector.expressions.Expressions._
- import org.apache.spark.sql.execution.SparkPlan
+ import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan}
  import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
  import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 -import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -1640,7 +1640,7 @@ index 7c830bf6c6e..6d9c643d83e 100644
    }
  
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
-index 7c4852c5e22..d1a34456bdc 100644
+index ede5d285932..c9a8abb5a94 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
 @@ -21,7 +21,7 @@ package org.apache.spark.sql.connector
@@ -1649,10 +1649,10 @@ index 7c4852c5e22..d1a34456bdc 100644
  
 -import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, Row}
 +import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, IgnoreCometSuite, Row}
- import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal}
+ import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal, TransformExpression}
  import org.apache.spark.sql.catalyst.expressions.objects.Invoke
  import org.apache.spark.sql.catalyst.plans.physical
-@@ -45,7 +45,8 @@ import org.apache.spark.sql.util.QueryExecutionListener
+@@ -46,7 +46,8 @@ import org.apache.spark.sql.util.QueryExecutionListener
  import org.apache.spark.tags.SlowSQLTest
  
  @SlowSQLTest
@@ -2889,7 +2889,7 @@ index 6b73cc8618d..e67aaeff9df 100644
          case _ => assert(false, "Can not match ParquetTable in the query.")
        }
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
-index 3072657a095..599d169cf8a 100644
+index 6ba790deddf..34b2f424c8f 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 @@ -40,6 +40,7 @@ import org.apache.parquet.schema.{MessageType, MessageTypeParser}
@@ -2900,7 +2900,7 @@ index 3072657a095..599d169cf8a 100644
  import org.apache.spark.sql.catalyst.InternalRow
  import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, UnsafeRow}
  import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils}
-@@ -953,7 +954,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+@@ -971,7 +972,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
      }
    }
  
@@ -2910,7 +2910,7 @@ index 3072657a095..599d169cf8a 100644
      val data = Seq(
        Tuple1((null, null)),
        Tuple1((null, null)),
-@@ -1567,7 +1569,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
+@@ -1585,7 +1587,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
      }
    }
  
@@ -3274,7 +3274,7 @@ index 38e5b15465b..ca3e8fef27a 100644
  
    testWithColumnFamilies("RocksDBStateStore",
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
-index e839ccd35ec..d182aa07b44 100644
+index 232332a6575..324afe9ebb7 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
 @@ -32,7 +32,8 @@ import org.apache.hadoop.conf.Configuration
@@ -3639,7 +3639,7 @@ index 465da3cd469..92ac998929d 100644
  
          val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
-index 22028a585e2..20c6b7c796a 100644
+index 6cdca9fb530..6542bc8dced 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
 @@ -34,7 +34,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
@@ -3819,10 +3819,10 @@ index f0f3f94b811..b7d18771314 100644
  
      spark.internalCreateDataFrame(withoutFilters.execute(), schema)
 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-index 245219c1756..b566f970ccd 100644
+index 720b13b812e..71b20c79a12 100644
 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
 +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-@@ -75,6 +75,21 @@ trait SharedSparkSessionBase
+@@ -98,6 +98,21 @@ trait SharedSparkSessionBase
        // this rule may potentially block testing of other optimization rules such as
        // ConstantPropagation etc.
        .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
diff --git a/docs/source/contributor-guide/benchmarking_macos.md b/docs/source/contributor-guide/benchmarking_macos.md
@@ -55,13 +55,13 @@ export DF_BENCH=`pwd`
 
 ## Install Spark
 
-Install Apache Spark. This example refers to 4.1.1 version.
+Install Apache Spark. This example refers to 4.1.2 version.
 
 ```shell
-wget https://archive.apache.org/dist/spark/spark-4.1.1/spark-4.1.1-bin-hadoop3.tgz
-tar xzf spark-4.1.1-bin-hadoop3.tgz
-sudo mv spark-4.1.1-bin-hadoop3 /opt
-export SPARK_HOME=/opt/spark-4.1.1-bin-hadoop3/
+wget https://archive.apache.org/dist/spark/spark-4.1.2/spark-4.1.2-bin-hadoop3.tgz
+tar xzf spark-4.1.2-bin-hadoop3.tgz
+sudo mv spark-4.1.2-bin-hadoop3 /opt
+export SPARK_HOME=/opt/spark-4.1.2-bin-hadoop3/
 ```
 
 Start Spark in standalone mode:
diff --git a/docs/source/user-guide/latest/compatibility/spark-versions.md b/docs/source/user-guide/latest/compatibility/spark-versions.md
@@ -66,7 +66,7 @@ Spark 4.0.2 is supported with Java 17 and Scala 2.13.
 
 ## Spark 4.1
 
-Spark 4.1.1 is supported with Java 17/21 and Scala 2.13.
+Spark 4.1.2 is supported with Java 17/21 and Scala 2.13.
 
 ### Known Limitations
 
diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md
@@ -50,7 +50,7 @@ Other versions may work well enough for development and evaluation purposes.
 | 3.4.3         | 11/17        | 2.12/2.13     | Yes               | Yes                   |
 | 3.5.8         | 11/17        | 2.12/2.13     | Yes               | Yes                   |
 | 4.0.2         | 17/21        | 2.13          | Yes               | Yes                   |
-| 4.1.1         | 17/21        | 2.13          | Yes               | Yes                   |
+| 4.1.2         | 17/21        | 2.13          | Yes               | Yes                   |
 
 Note that we do not test the full matrix of supported Java and Scala versions in CI for every Spark version.
 
diff --git a/docs/source/user-guide/latest/kubernetes.md b/docs/source/user-guide/latest/kubernetes.md
@@ -72,7 +72,7 @@ spec:
   image: apache/datafusion-comet:$COMET_VERSION-spark3.5.5-scala2.12-java11
   imagePullPolicy: IfNotPresent
   mainClass: org.apache.spark.examples.SparkPi
-  mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.1.jar
+  mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.2.jar
   sparkConf:
     "spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-$COMET_VERSION.jar"
     "spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-$COMET_VERSION.jar"
@@ -82,17 +82,17 @@ spec:
     "spark.comet.exec.shuffle.enabled": "true"
     "spark.comet.exec.shuffle.mode": "auto"
     "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager"
-  sparkVersion: 4.1.1
+  sparkVersion: 4.1.2
   driver:
     labels:
-      version: 4.1.1
+      version: 4.1.2
     cores: 1
     coreLimit: 1200m
     memory: 512m
     serviceAccount: spark-operator-spark
   executor:
     labels:
-      version: 4.1.1
+      version: 4.1.2
     instances: 1
     cores: 1
     coreLimit: 1200m
diff --git a/kube/Dockerfile b/kube/Dockerfile
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-FROM apache/spark:4.1.1 AS builder
+FROM apache/spark:4.1.2 AS builder
 
 USER root
 
@@ -69,7 +69,7 @@ RUN mkdir -p /root/.m2 && \
 RUN cd /comet \
     && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
 
-FROM apache/spark:4.1.1
+FROM apache/spark:4.1.2
 ENV SPARK_VERSION=4.1
 ENV SCALA_VERSION=2.13
 USER root
diff --git a/pom.xml b/pom.xml
@@ -69,7 +69,7 @@ under the License.
     <scala.plugin.version>4.9.6</scala.plugin.version>
     <scalatest.version>3.2.16</scalatest.version>
     <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
-    <spark.version>4.1.1</spark.version>
+    <spark.version>4.1.2</spark.version>
     <spark.version.short>4.1</spark.version.short>
     <spark.maven.scope>provided</spark.maven.scope>
     <protobuf.version>3.25.5</protobuf.version>
@@ -704,13 +704,13 @@ under the License.
     <profile>
       <id>spark-4.1</id>
       <properties>
-        <!-- Spark 4.1.1 is compiled against Scala 2.13.17 and emits calls into stdlib methods
+        <!-- Spark 4.1 is compiled against Scala 2.13.17 and emits calls into stdlib methods
              added in that release (e.g. MurmurHash3$.caseClassHash$default$2()). Comet must
              match to avoid runtime NoSuchMethodError. Note: semanticdb-scalac_2.13.17 is not
              yet published, so the -Psemanticdb / scalafix lint job is skipped for spark-4.1. -->
         <scala.version>2.13.17</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
-        <spark.version>4.1.1</spark.version>
+        <spark.version>4.1.2</spark.version>
         <spark.version.short>4.1</spark.version.short>
         <parquet.version>1.16.0</parquet.version>
         <semanticdb.version>4.13.6</semanticdb.version>
diff --git a/spark/pom.xml b/spark/pom.xml
@@ -291,7 +291,7 @@ under the License.
           <version>1.10.0</version>
           <scope>test</scope>
         </dependency>
-        <!-- Jetty 11.x for Spark 4.1 (jakarta.servlet); matches Spark 4.1.1's jetty.version -->
+        <!-- Jetty 11.x for Spark 4.1 (jakarta.servlet); matches Spark 4.1.2's jetty.version -->
         <dependency>
           <groupId>org.eclipse.jetty</groupId>
           <artifactId>jetty-server</artifactId>
diff --git a/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala b/spark/src/test/spark-4.1/org/apache/spark/sql/comet/CometDecimalArithmeticViewSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.types.DecimalType
 
 class CometDecimalArithmeticViewSuite extends CometTestBase {
 
-  // Spark 4.1.1 (SPARK-53968) stores `spark.sql.decimalOperations.allowPrecisionLoss` per
+  // Spark 4.1+ (SPARK-53968) stores `spark.sql.decimalOperations.allowPrecisionLoss` per
   // arithmetic expression so a view's analyzed plan keeps a stable result type across config
   // changes. Comet's DecimalPrecision rule used to recompute the result type from the current
   // SQLConf, producing a CheckOverflow target that disagreed with the stored Add.dataType and