Skip to content

Commit fe31e41

Browse files
manuzhangcodex
andcommitted
build: upgrade Spark 4.1 to 4.1.2
Updates the Spark 4.1 Maven profile, Docker image, docs, and Spark SQL test diff to Spark 4.1.2. Co-authored-by: Codex <codex@openai.com>
1 parent e79183e commit fe31e41

11 files changed

Lines changed: 41 additions & 41 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ jobs:
242242
uses: ./.github/workflows/spark_sql_test_reusable.yml
243243
with:
244244
spark-short: '4.1'
245-
spark-full: '4.1.1'
245+
spark-full: '4.1.2'
246246
java: 17
247247

248248
iceberg_1_8:

dev/ci/compute-changes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@
169169
"!spark/src/main/spark-4.2/**",
170170
"!spark/src/main/scala/org/apache/comet/GenerateDocs.scala",
171171
"spark/pom.xml",
172-
"dev/diffs/4.1.1.diff",
172+
"dev/diffs/4.1.2.diff",
173173
"pom.xml",
174174
"rust-toolchain.toml",
175175
".github/workflows/ci.yml",
Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ index 6df8bc85b51..dabb75e2b75 100644
3939
withSpark(sc) { sc =>
4040
TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
4141
diff --git a/pom.xml b/pom.xml
42-
index dc757d78812..10f7b202e71 100644
42+
index dc201151999..3e278cfb34c 100644
4343
--- a/pom.xml
4444
+++ b/pom.xml
4545
@@ -152,6 +152,8 @@
@@ -78,7 +78,7 @@ index dc757d78812..10f7b202e71 100644
7878
<dependency>
7979
<groupId>org.apache.datasketches</groupId>
8080
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
81-
index d2d07a08aa9..d89f80e5b68 100644
81+
index c25b83c355b..5e23b863dcf 100644
8282
--- a/sql/core/pom.xml
8383
+++ b/sql/core/pom.xml
8484
@@ -97,6 +97,10 @@
@@ -392,7 +392,7 @@ index 0d807aeae4d..6d7744e771b 100644
392392

393393
withTempView("t0", "t1", "t2") {
394394
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
395-
index 0dfd37ebeae..66340218c7c 100644
395+
index bfe15b33768..55c23a38ccc 100644
396396
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
397397
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
398398
@@ -31,7 +31,7 @@ import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
@@ -695,10 +695,10 @@ index e1a2fd33c7c..632f4b695df 100644
695695
}
696696
assert(scanOption.isDefined)
697697
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
698-
index b27122a8de2..3c690dbe788 100644
698+
index 4c62c47971a..ecc7ed21546 100644
699699
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
700700
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
701-
@@ -267,7 +267,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
701+
@@ -268,7 +268,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
702702
}
703703
}
704704

@@ -708,7 +708,7 @@ index b27122a8de2..3c690dbe788 100644
708708
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
709709
withTempView("df") {
710710
val df1 = spark.range(1, 100)
711-
@@ -470,7 +471,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
711+
@@ -471,7 +472,8 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
712712
}
713713
}
714714

@@ -718,7 +718,7 @@ index b27122a8de2..3c690dbe788 100644
718718
withTempDir { dir =>
719719
Seq("parquet", "orc", "csv", "json").foreach { fmt =>
720720
val basePath = dir.getCanonicalPath + "/" + fmt
721-
@@ -548,7 +550,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
721+
@@ -549,7 +551,9 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
722722
}
723723
}
724724

@@ -1276,7 +1276,7 @@ index d7b2511eac2..d5f5b940b94 100644
12761276
val session = classic.SparkSession.builder().sparkContext(sc).getOrCreate()
12771277
import session.implicits._
12781278
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
1279-
index ff0ee19ae97..01958e0c45b 100644
1279+
index 7bfc8cf4fa6..7a425b74184 100644
12801280
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
12811281
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
12821282
@@ -17,6 +17,8 @@
@@ -1437,7 +1437,7 @@ index fee375db10a..8c2c24e2c5f 100644
14371437
val v = VariantBuilder.parseJson(s, false)
14381438
new VariantVal(v.getValue, v.getMetadata)
14391439
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
1440-
index 6cdf681d65c..34a0e3714bd 100644
1440+
index 8f7a68bcbe6..88dbe1793c9 100644
14411441
--- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
14421442
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSuite.scala
14431443
@@ -26,6 +26,8 @@ import org.apache.spark.sql.{AnalysisException, Row}
@@ -1593,7 +1593,7 @@ index 2a0ab21ddb0..6030e7c2b9b 100644
15931593
} finally {
15941594
spark.listenerManager.unregister(listener)
15951595
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
1596-
index 7c830bf6c6e..6d9c643d83e 100644
1596+
index 122c511bf83..9bea26c5225 100644
15971597
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
15981598
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
15991599
@@ -24,6 +24,8 @@ import org.apache.spark.sql.{DataFrame, Row}
@@ -1606,7 +1606,7 @@ index 7c830bf6c6e..6d9c643d83e 100644
16061606
import org.apache.spark.sql.connector.catalog.functions._
16071607
import org.apache.spark.sql.connector.distributions.Distributions
16081608
@@ -32,7 +34,7 @@ import org.apache.spark.sql.connector.expressions.Expressions._
1609-
import org.apache.spark.sql.execution.SparkPlan
1609+
import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan}
16101610
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
16111611
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
16121612
-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -1640,7 +1640,7 @@ index 7c830bf6c6e..6d9c643d83e 100644
16401640
}
16411641

16421642
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
1643-
index 7c4852c5e22..d1a34456bdc 100644
1643+
index ede5d285932..c9a8abb5a94 100644
16441644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
16451645
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
16461646
@@ -21,7 +21,7 @@ package org.apache.spark.sql.connector
@@ -1649,10 +1649,10 @@ index 7c4852c5e22..d1a34456bdc 100644
16491649

16501650
-import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, Row}
16511651
+import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, IgnoreCometSuite, Row}
1652-
import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal}
1652+
import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal, TransformExpression}
16531653
import org.apache.spark.sql.catalyst.expressions.objects.Invoke
16541654
import org.apache.spark.sql.catalyst.plans.physical
1655-
@@ -45,7 +45,8 @@ import org.apache.spark.sql.util.QueryExecutionListener
1655+
@@ -46,7 +46,8 @@ import org.apache.spark.sql.util.QueryExecutionListener
16561656
import org.apache.spark.tags.SlowSQLTest
16571657

16581658
@SlowSQLTest
@@ -2889,7 +2889,7 @@ index 6b73cc8618d..e67aaeff9df 100644
28892889
case _ => assert(false, "Can not match ParquetTable in the query.")
28902890
}
28912891
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
2892-
index 3072657a095..599d169cf8a 100644
2892+
index 6ba790deddf..34b2f424c8f 100644
28932893
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
28942894
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
28952895
@@ -40,6 +40,7 @@ import org.apache.parquet.schema.{MessageType, MessageTypeParser}
@@ -2900,7 +2900,7 @@ index 3072657a095..599d169cf8a 100644
29002900
import org.apache.spark.sql.catalyst.InternalRow
29012901
import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, UnsafeRow}
29022902
import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils}
2903-
@@ -953,7 +954,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
2903+
@@ -971,7 +972,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
29042904
}
29052905
}
29062906

@@ -2910,7 +2910,7 @@ index 3072657a095..599d169cf8a 100644
29102910
val data = Seq(
29112911
Tuple1((null, null)),
29122912
Tuple1((null, null)),
2913-
@@ -1567,7 +1569,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
2913+
@@ -1585,7 +1587,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
29142914
}
29152915
}
29162916

@@ -3274,7 +3274,7 @@ index 38e5b15465b..ca3e8fef27a 100644
32743274

32753275
testWithColumnFamilies("RocksDBStateStore",
32763276
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
3277-
index e839ccd35ec..d182aa07b44 100644
3277+
index 232332a6575..324afe9ebb7 100644
32783278
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
32793279
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
32803280
@@ -32,7 +32,8 @@ import org.apache.hadoop.conf.Configuration
@@ -3639,7 +3639,7 @@ index 465da3cd469..92ac998929d 100644
36393639

36403640
val aggregateExecsWithoutPartialAgg = allAggregateExecs.filter {
36413641
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
3642-
index 22028a585e2..20c6b7c796a 100644
3642+
index 6cdca9fb530..6542bc8dced 100644
36433643
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
36443644
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
36453645
@@ -34,7 +34,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
@@ -3819,10 +3819,10 @@ index f0f3f94b811..b7d18771314 100644
38193819

38203820
spark.internalCreateDataFrame(withoutFilters.execute(), schema)
38213821
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
3822-
index 245219c1756..b566f970ccd 100644
3822+
index 720b13b812e..71b20c79a12 100644
38233823
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
38243824
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
3825-
@@ -75,6 +75,21 @@ trait SharedSparkSessionBase
3825+
@@ -98,6 +98,21 @@ trait SharedSparkSessionBase
38263826
// this rule may potentially block testing of other optimization rules such as
38273827
// ConstantPropagation etc.
38283828
.set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)

docs/source/contributor-guide/benchmarking_macos.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,13 @@ export DF_BENCH=`pwd`
5555

5656
## Install Spark
5757

58-
Install Apache Spark. This example refers to 4.1.1 version.
58+
Install Apache Spark. This example refers to 4.1.2 version.
5959

6060
```shell
61-
wget https://archive.apache.org/dist/spark/spark-4.1.1/spark-4.1.1-bin-hadoop3.tgz
62-
tar xzf spark-4.1.1-bin-hadoop3.tgz
63-
sudo mv spark-4.1.1-bin-hadoop3 /opt
64-
export SPARK_HOME=/opt/spark-4.1.1-bin-hadoop3/
61+
wget https://archive.apache.org/dist/spark/spark-4.1.2/spark-4.1.2-bin-hadoop3.tgz
62+
tar xzf spark-4.1.2-bin-hadoop3.tgz
63+
sudo mv spark-4.1.2-bin-hadoop3 /opt
64+
export SPARK_HOME=/opt/spark-4.1.2-bin-hadoop3/
6565
```
6666

6767
Start Spark in standalone mode:

docs/source/user-guide/latest/compatibility/spark-versions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ Spark 4.0.2 is supported with Java 17 and Scala 2.13.
6666

6767
## Spark 4.1
6868

69-
Spark 4.1.1 is supported with Java 17/21 and Scala 2.13.
69+
Spark 4.1.2 is supported with Java 17/21 and Scala 2.13.
7070

7171
### Known Limitations
7272

docs/source/user-guide/latest/installation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ Other versions may work well enough for development and evaluation purposes.
5050
| 3.4.3 | 11/17 | 2.12/2.13 | Yes | Yes |
5151
| 3.5.8 | 11/17 | 2.12/2.13 | Yes | Yes |
5252
| 4.0.2 | 17/21 | 2.13 | Yes | Yes |
53-
| 4.1.1 | 17/21 | 2.13 | Yes | Yes |
53+
| 4.1.2 | 17/21 | 2.13 | Yes | Yes |
5454

5555
Note that we do not test the full matrix of supported Java and Scala versions in CI for every Spark version.
5656

docs/source/user-guide/latest/kubernetes.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ spec:
7272
image: apache/datafusion-comet:$COMET_VERSION-spark3.5.5-scala2.12-java11
7373
imagePullPolicy: IfNotPresent
7474
mainClass: org.apache.spark.examples.SparkPi
75-
mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.1.jar
75+
mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.13-4.1.2.jar
7676
sparkConf:
7777
"spark.executor.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-$COMET_VERSION.jar"
7878
"spark.driver.extraClassPath": "/opt/spark/jars/comet-spark-spark3.5_2.12-$COMET_VERSION.jar"
@@ -82,17 +82,17 @@ spec:
8282
"spark.comet.exec.shuffle.enabled": "true"
8383
"spark.comet.exec.shuffle.mode": "auto"
8484
"spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager"
85-
sparkVersion: 4.1.1
85+
sparkVersion: 4.1.2
8686
driver:
8787
labels:
88-
version: 4.1.1
88+
version: 4.1.2
8989
cores: 1
9090
coreLimit: 1200m
9191
memory: 512m
9292
serviceAccount: spark-operator-spark
9393
executor:
9494
labels:
95-
version: 4.1.1
95+
version: 4.1.2
9696
instances: 1
9797
cores: 1
9898
coreLimit: 1200m

kube/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# limitations under the License.
1616
#
1717

18-
FROM apache/spark:4.1.1 AS builder
18+
FROM apache/spark:4.1.2 AS builder
1919

2020
USER root
2121

@@ -69,7 +69,7 @@ RUN mkdir -p /root/.m2 && \
6969
RUN cd /comet \
7070
&& JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION"
7171

72-
FROM apache/spark:4.1.1
72+
FROM apache/spark:4.1.2
7373
ENV SPARK_VERSION=4.1
7474
ENV SCALA_VERSION=2.13
7575
USER root

pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ under the License.
6969
<scala.plugin.version>4.9.6</scala.plugin.version>
7070
<scalatest.version>3.2.16</scalatest.version>
7171
<scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
72-
<spark.version>4.1.1</spark.version>
72+
<spark.version>4.1.2</spark.version>
7373
<spark.version.short>4.1</spark.version.short>
7474
<spark.maven.scope>provided</spark.maven.scope>
7575
<protobuf.version>3.25.5</protobuf.version>
@@ -704,13 +704,13 @@ under the License.
704704
<profile>
705705
<id>spark-4.1</id>
706706
<properties>
707-
<!-- Spark 4.1.1 is compiled against Scala 2.13.17 and emits calls into stdlib methods
707+
<!-- Spark 4.1 is compiled against Scala 2.13.17 and emits calls into stdlib methods
708708
added in that release (e.g. MurmurHash3$.caseClassHash$default$2()). Comet must
709709
match to avoid runtime NoSuchMethodError. Note: semanticdb-scalac_2.13.17 is not
710710
yet published, so the -Psemanticdb / scalafix lint job is skipped for spark-4.1. -->
711711
<scala.version>2.13.17</scala.version>
712712
<scala.binary.version>2.13</scala.binary.version>
713-
<spark.version>4.1.1</spark.version>
713+
<spark.version>4.1.2</spark.version>
714714
<spark.version.short>4.1</spark.version.short>
715715
<parquet.version>1.16.0</parquet.version>
716716
<semanticdb.version>4.13.6</semanticdb.version>

spark/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ under the License.
291291
<version>1.10.0</version>
292292
<scope>test</scope>
293293
</dependency>
294-
<!-- Jetty 11.x for Spark 4.1 (jakarta.servlet); matches Spark 4.1.1's jetty.version -->
294+
<!-- Jetty 11.x for Spark 4.1 (jakarta.servlet); matches Spark 4.1.2's jetty.version -->
295295
<dependency>
296296
<groupId>org.eclipse.jetty</groupId>
297297
<artifactId>jetty-server</artifactId>

0 commit comments

Comments
 (0)