Skip to content

Commit 0a91148

Browse files
authored
[Build] Pin Spark 4.2 lane to 4.2.0-preview5 (#6870)
## 🥞 Stacked PR Use this [link](https://github.com/delta-io/delta/pull/6870/files) to review incremental changes. - [**stack/spark42-preview**](#6870) [[Files changed](https://github.com/delta-io/delta/pull/6870/files)] - [stack/bump_uc_sha](#6866) [[Files changed](https://github.com/delta-io/delta/pull/6866/files/046318c601ad27e6edbb069fba558bff95b047e5..11a3e61ca79e90de45ee2252c2cefc06d6ec5ebb)] - [stack/DeltaCatalogClient_create](#6826) [[Files changed](https://github.com/delta-io/delta/pull/6826/files/11a3e61ca79e90de45ee2252c2cefc06d6ec5ebb..8a63fe4049beb49bf93f4d1b3a308b56ca5a273f)] - [stack/DeltaCatalogClient_replace](#6859) [[Files changed](https://github.com/delta-io/delta/pull/6859/files/8a63fe4049beb49bf93f4d1b3a308b56ca5a273f..0c5b785015d413cd9c026cd1091cf7f3d457f5f6)] --------- #### Which Delta project/connector is this regarding? - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description [Build] Pin Spark 4.2 lane to 4.2.0-preview5 - `CrossSparkVersions.scala`: `4.2.0-SNAPSHOT` -> `4.2.0-preview5`; drop jitpack. - `get_spark_version_info.py`: `--released-spark-versions` explicitly excludes `-SNAPSHOT` / `-preview` markers. - `test_cross_spark_publish.py`: bump pinned version; widen version regex to accept `-previewN`; mirror the explicit pre-release marker check. - `spark_test.yaml`: comment "snapshot" -> "preview". - `DeltaChangelogTestBase.java`: widen the existing SNAPSHOT-only skip to also skip `-preview` (known `DeltaCatalog`/`AbstractDeltaCatalog` classpath bug in sparkV2). - `DeltaParquetFormatVersionSuite.scala`: widen the SPARK-56414 SNAPSHOT-only `assume` to also skip `-preview` (Delta's `DeltaFileFormatWriter` does not yet merge per-write options). ## How was this patch tested? CI ## Does this PR introduce _any_ user-facing changes? No Signed-off-by: Yi Li <yi.li@databricks.com>
1 parent a8f50d6 commit 0a91148

6 files changed

Lines changed: 37 additions & 25 deletions

File tree

.github/workflows/spark_test.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ jobs:
3737
name: "DS: Spark ${{ matrix.spark_version }}, Scala ${{ matrix.scala }}, Shard ${{ matrix.shard }}"
3838
runs-on: ubuntu-24.04
3939
needs: generate-matrix
40-
# Spark 4.2 is a branch snapshot until the Spark 4.2 release is published.
41-
# Do not block PRs on its failures while the snapshot lane is stabilizing.
40+
# Spark 4.2 is a preview release until the Spark 4.2 release is published.
41+
# Do not block PRs on its failures while the preview lane is stabilizing.
4242
continue-on-error: ${{ matrix.spark_version == '4.2' }}
4343
strategy:
4444
fail-fast: false

project/CrossSparkVersions.scala

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -286,18 +286,15 @@ object SparkVersionSpec {
286286
jacksonVersion = "2.18.2"
287287
)
288288

289-
private val spark42Snapshot = SparkVersionSpec(
290-
fullVersion = "4.2.0-SNAPSHOT",
289+
private val spark42Preview = SparkVersionSpec(
290+
fullVersion = "4.2.0-preview5",
291291
targetJvm = "17",
292292
additionalSourceDir = Some("scala-shims/spark-4.2"),
293293
supportIceberg = false,
294294
supportHudi = false,
295295
antlr4Version = "4.13.1",
296296
additionalJavaOptions = java17TestSettings,
297-
jacksonVersion = "2.18.2",
298-
// Artifact updates in maven central for roaringbitmap stopped after 1.3.0.
299-
// Spark branch-4.2 uses 1.5.3. Relevant Spark PR here https://github.com/apache/spark/pull/52892
300-
additionalResolvers = Seq("jitpack" at "https://jitpack.io")
297+
jacksonVersion = "2.18.2"
301298
)
302299

303300
/** Default Spark version */
@@ -307,7 +304,7 @@ object SparkVersionSpec {
307304
val MASTER: Option[SparkVersionSpec] = None
308305

309306
/** All supported Spark versions - internal use only */
310-
val ALL_SPECS = Seq(spark40, spark41, spark42Snapshot)
307+
val ALL_SPECS = Seq(spark40, spark41, spark42Preview)
311308
}
312309

313310
/** See docs on top of this file */

project/scripts/get_spark_version_info.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,12 @@ def main():
111111
print(json.dumps(matrix_versions))
112112

113113
elif args.released_spark_versions:
114-
# Only include released versions (no -SNAPSHOT in fullVersion)
114+
# Only include released versions; explicitly exclude pre-release markers
115+
# (`-SNAPSHOT`, `-previewN`).
116+
pre_release_markers = ("-SNAPSHOT", "-preview")
115117
matrix_versions = []
116118
for v in versions:
117-
if "-SNAPSHOT" not in v["fullVersion"]:
119+
if not any(m in v["fullVersion"] for m in pre_release_markers):
118120
matrix_versions.append(v["shortVersion"])
119121
print(json.dumps(matrix_versions))
120122

project/tests/test_cross_spark_publish.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def all_jars(self) -> List[str]:
108108
SPARK_VERSIONS: Dict[str, SparkVersionSpec] = {
109109
"4.0.1": SparkVersionSpec(suffix="_4.0", support_iceberg=True, support_hudi=True),
110110
"4.1.0": SparkVersionSpec(suffix="_4.1", support_iceberg=False, support_hudi=False),
111-
"4.2.0-SNAPSHOT": SparkVersionSpec(suffix="_4.2", support_iceberg=False, support_hudi=False)
111+
"4.2.0-preview5": SparkVersionSpec(suffix="_4.2", support_iceberg=False, support_hudi=False)
112112
}
113113

114114
# The default Spark version
@@ -320,9 +320,9 @@ def validate_spark_versions(self) -> None:
320320
)
321321

322322
# Parse output - each line is a Spark version
323-
# Version format: X.Y.Z or X.Y.Z-SNAPSHOT
323+
# Version format: X.Y.Z, X.Y.Z-SNAPSHOT, X.Y.Z-previewN
324324
import re
325-
version_pattern = re.compile(r'^\d+\.\d+\.\d+(-SNAPSHOT)?$')
325+
version_pattern = re.compile(r'^\d+\.\d+\.\d+(-(SNAPSHOT|preview\d+))?$')
326326

327327
build_versions = set()
328328
for line in result.stdout.strip().split('\n'):
@@ -469,7 +469,7 @@ def test_all_spark_versions(self) -> bool:
469469
return False
470470

471471
def test_released_spark_versions(self) -> bool:
472-
"""Test that --released-spark-versions excludes snapshots."""
472+
"""Test that --released-spark-versions excludes snapshots and pre-releases."""
473473
if not self.ensure_json_exists():
474474
return False
475475

@@ -493,22 +493,30 @@ def test_released_spark_versions(self) -> bool:
493493
print(" ✗ All entries must be strings")
494494
return False
495495

496-
# Load JSON and verify snapshots are excluded
496+
# Load JSON and verify pre-release versions are excluded
497+
# (`-SNAPSHOT`, `-previewN`).
498+
pre_release_markers = ("-SNAPSHOT", "-preview")
499+
500+
def is_pre_release(full_version: str) -> bool:
501+
return any(m in full_version for m in pre_release_markers)
502+
497503
with open(self.json_path, 'r') as f:
498504
data = json.load(f)
499505

500-
expected_count = sum(1 for entry in data if "-SNAPSHOT" not in entry["fullVersion"])
506+
expected_count = sum(1 for entry in data if not is_pre_release(entry["fullVersion"]))
501507
if len(released_versions) != expected_count:
502508
print(f" ✗ Expected {expected_count} released versions, got {len(released_versions)}")
503509
return False
504510

505-
# Verify no snapshot versions included
511+
# Verify no pre-release versions included (rough check via shortVersion lookup)
512+
full_versions_by_short = {entry["shortVersion"]: entry["fullVersion"] for entry in data}
506513
for version in released_versions:
507-
if "SNAPSHOT" in version.upper():
508-
print(f" ✗ Released versions should not include snapshots: {version}")
514+
full = full_versions_by_short.get(version, "")
515+
if is_pre_release(full):
516+
print(f" ✗ Released versions should not include pre-releases: {version} ({full})")
509517
return False
510518

511-
print(f" ✓ --released-spark-versions: {released_versions} (snapshots excluded)")
519+
print(f" ✓ --released-spark-versions: {released_versions} (snapshots and previews excluded)")
512520
return True
513521

514522
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:

spark/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFormatVersionSuite.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,14 @@ class DeltaParquetFormatVersionSuite
119119
* Guard for tests that need SPARK-56414 behavior (per-write options overriding session conf
120120
* in Parquet writes). SPARK-56414 is merged into Spark 4.2, but Delta's DeltaFileFormatWriter
121121
* (a fork of FileFormatWriter) does not yet call mergeWriteOptionsIntoHadoopConf. Skip on
122-
* SNAPSHOT until DeltaFileFormatWriter is updated.
122+
* Spark pre-release builds (SNAPSHOT, preview) until DeltaFileFormatWriter is updated.
123123
*/
124124
private def assumeSpark56414Available(): Unit = {
125-
assume(spark.version >= "4.2" && !spark.version.contains("SNAPSHOT"),
125+
val sparkVersion = spark.version
126+
assume(
127+
sparkVersion >= "4.2" &&
128+
!sparkVersion.contains("SNAPSHOT") &&
129+
!sparkVersion.contains("preview"),
126130
"DeltaFileFormatWriter does not yet merge per-write options into Hadoop conf (SPARK-56414)")
127131
}
128132

spark/v2/src/test/scala-shims/spark-4.2/io/delta/spark/internal/v2/read/changelog/DeltaChangelogTestBase.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ public abstract class DeltaChangelogTestBase extends DeltaV2TestBase {
4040

4141
@BeforeAll
4242
public static void setUpChangelogSparkAndEngine() {
43+
String sparkVersion = org.apache.spark.package$.MODULE$.SPARK_VERSION();
4344
org.junit.jupiter.api.Assumptions.assumeFalse(
44-
org.apache.spark.package$.MODULE$.SPARK_VERSION().contains("SNAPSHOT"),
45-
"Changelog tests are temporarily skipped on Spark SNAPSHOT builds"
45+
sparkVersion.contains("SNAPSHOT") || sparkVersion.contains("preview"),
46+
"Changelog tests are temporarily skipped on Spark pre-release builds (SNAPSHOT/preview)"
4647
+ " due to a DeltaCatalog/AbstractDeltaCatalog classpath issue in sparkV2");
4748
if (spark != null) {
4849
spark.stop();

0 commit comments

Comments
 (0)