Skip to content

Commit a81610d

Browse files
committed
Bump UC sha to af090e7 and use the new UC-Spark package name
Signed-off-by: Yi Li <yi.li@databricks.com>
1 parent 81a7b99 commit a81610d

5 files changed

Lines changed: 90 additions & 21 deletions

File tree

.github/actions/setup-unitycatalog/action.yml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@ name: "Set up pinned Unity Catalog build"
22
description: >-
33
Publishes Unity Catalog jars from the commit pinned in project/scripts/setup_unitycatalog_main.sh
44
(the UC_PIN_SHA= line) to the runner's local Ivy / Maven caches, using GitHub Actions cache so the
5-
slow UC build only runs the first time a pin is seen.
5+
slow UC build only runs the first time a pin is seen. Reads SPARK_VERSION (Spark major.minor
6+
short form) from the calling job's env to pick the Spark variant UC builds against; matrix
7+
workflows set this from `matrix.spark_version`, other workflows leave it unset and inherit the
8+
script's default. The cache key reflects SPARK_VERSION literally - workflows that share the
9+
default share one cache entry under an empty `spark-` segment.
610
711
runs:
812
using: "composite"
@@ -15,9 +19,9 @@ runs:
1519
path: |
1620
~/.ivy2/local
1721
~/.m2/repository/io/unitycatalog
18-
# Cache key hashes the setup script, so bumping UC_PIN_SHA (or any other script change)
19-
# invalidates the cache.
20-
key: uc-jars-${{ runner.os }}-${{ hashFiles('project/scripts/setup_unitycatalog_main.sh') }}
22+
# Cache key hashes the setup script (so any script change invalidates) and includes the
23+
# Spark short version (so 4.0 and 4.1 don't fight over a single shared cache entry).
24+
key: uc-jars-${{ runner.os }}-spark${{ env.SPARK_VERSION }}-${{ hashFiles('project/scripts/setup_unitycatalog_main.sh') }}
2125
- name: Build Unity Catalog from pinned SHA
2226
shell: bash
2327
run: bash project/scripts/setup_unitycatalog_main.sh
@@ -33,4 +37,4 @@ runs:
3337
path: |
3438
~/.ivy2/local
3539
~/.m2/repository/io/unitycatalog
36-
key: uc-jars-${{ runner.os }}-${{ hashFiles('project/scripts/setup_unitycatalog_main.sh') }}
40+
key: uc-jars-${{ runner.os }}-spark${{ env.SPARK_VERSION }}-${{ hashFiles('project/scripts/setup_unitycatalog_main.sh') }}

.github/workflows/build.yaml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,20 @@ jobs:
3838
key: delta-sbt-cache-cross-spark
3939

4040
# publishM2 compiles every aggregated project, including storage, which has
41-
# unitycatalog-client as a compile-scope dependency. Publish the pinned UC build locally
42-
# first so Delta compiles against the UC APIs it actually targets.
43-
- name: Set up pinned Unity Catalog
41+
# unitycatalog-client as a compile-scope dependency. test_cross_spark_publish.py also
42+
# iterates over released Spark versions (sbt -DsparkVersion=<X.Y>), so we need UC's
43+
# spark connector published for each variant Delta will resolve. Invoke the setup action
44+
# once per Spark variant; the action's cache key partitions by SPARK_VERSION so each
45+
# variant warms its own cache entry. Keep this list in sync with the released versions
46+
# in project/spark-versions.json (snapshot versions are skipped by the cross-Spark test).
47+
- name: Set up pinned Unity Catalog (Spark 4.0)
4448
uses: ./.github/actions/setup-unitycatalog
49+
env:
50+
SPARK_VERSION: "4.0"
51+
- name: Set up pinned Unity Catalog (Spark 4.1)
52+
uses: ./.github/actions/setup-unitycatalog
53+
env:
54+
SPARK_VERSION: "4.1"
4555

4656
- name: Run cross-Spark build test
4757
run: python project/tests/test_cross_spark_publish.py

.github/workflows/spark_examples_test.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ jobs:
4545
scala: [2.13.17]
4646
env:
4747
SCALA_VERSION: ${{ matrix.scala }}
48+
# Short Spark version (e.g. "4.0"). The UC setup action reads this from job env to pick the
49+
# variant of UC to build. Per-step `env:` blocks below override this with the full version
50+
# for Delta and examples sbt invocations that want "4.0.1"-style values.
51+
SPARK_VERSION: ${{ matrix.spark_version }}
4852
steps:
4953
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
5054
- name: Get Spark version details

build.sbt

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,12 @@ val unityCatalogVersion: String = sys.props.getOrElse(
786786
if (useDefaultUnityCatalogReleaseVersion) defaultUnityCatalogReleaseVersion
787787
else unityCatalogReleaseVersion.getOrElse(pinnedUnityCatalogVersion))
788788

789+
// UC publishes its Spark connector per Spark major.minor (e.g. unitycatalog-spark_4.1). This
790+
// is the artifact name without the Scala suffix - sbt's `%%` appends `_2.13` for dep
791+
// resolution; the canary check below appends `_2.13` explicitly for the Ivy/Maven path.
792+
val unityCatalogSparkArtifactName: String =
793+
s"unitycatalog-spark_${CrossSparkVersions.getSparkVersionSpec().shortVersion}"
794+
789795
/**
790796
* Returns true when `current` is at least `target`. Numeric segments only; suffix after
791797
* the first `-` (e.g. `-SNAPSHOT-abc1234`) is stripped before comparison.
@@ -831,7 +837,12 @@ def publishPinnedUnityCatalog(log: sbt.util.Logger, canary: java.io.File): Unit
831837
val procLogger = ProcessLogger(
832838
line => log.info(s"[UC setup] $line"),
833839
line => log.warn(s"[UC setup] $line"))
834-
val exit = Process(Seq("bash", unityCatalogSetupScript)).!(procLogger)
840+
// SPARK_VERSION tells the script which Spark variant to build (forwarded to UC's sbt as
841+
// -DsparkVersion).
842+
val exit = Process(
843+
Seq("bash", unityCatalogSetupScript),
844+
None,
845+
"SPARK_VERSION" -> CrossSparkVersions.getSparkVersionSpec().shortVersion).!(procLogger)
835846
if (exit != 0) {
836847
sys.error(
837848
s"[UC] $unityCatalogSetupScript exited with code $exit. Run it manually to see full output.")
@@ -853,13 +864,17 @@ Global / ensurePinnedUnityCatalog := {
853864
sys.props.contains("unityCatalogVersion")
854865
if (unityCatalogReleaseVersion.isEmpty && !usingReleasedVersion) {
855866
val home = file(sys.props("user.home"))
867+
// Canary on the spark artifact, not client/server: those are Spark-version-independent and
868+
// would short-circuit the trigger when only the active Spark version changed, leaving the
869+
// needed unitycatalog-spark_${X.Y}_2.13 unpublished.
870+
val sparkArtifact = s"${unityCatalogSparkArtifactName}_2.13"
856871
// Check both layouts: a restored sbt cache can pre-populate ivy alone, leaving m2 empty -
857872
// checking only ivy would silently skip the slow publish and break mvn-based consumers.
858873
val ivy2Canary = home / ".ivy2" / "local" / "io.unitycatalog" /
859-
"unitycatalog-client" / unityCatalogVersion / "ivys" / "ivy.xml"
874+
sparkArtifact / unityCatalogVersion / "ivys" / "ivy.xml"
860875
val m2Canary = home / ".m2" / "repository" / "io" / "unitycatalog" /
861-
"unitycatalog-client" / unityCatalogVersion /
862-
s"unitycatalog-client-$unityCatalogVersion.pom"
876+
sparkArtifact / unityCatalogVersion /
877+
s"$sparkArtifact-$unityCatalogVersion.pom"
863878
if (!ivy2Canary.exists || !m2Canary.exists) {
864879
publishPinnedUnityCatalog(log, ivy2Canary)
865880
}
@@ -913,7 +928,7 @@ lazy val sparkUnityCatalog = (project in file("spark/unitycatalog"))
913928
"org.projectlombok" % "lombok" % "1.18.34" % "test",
914929

915930
// Unity Catalog dependencies - exclude Jackson to use Spark's Jackson 2.15.x
916-
"io.unitycatalog" %% "unitycatalog-spark" % unityCatalogVersion % "test" excludeAll(
931+
"io.unitycatalog" %% unityCatalogSparkArtifactName % unityCatalogVersion % "test" excludeAll(
917932
ExclusionRule(organization = "com.fasterxml.jackson.core"),
918933
ExclusionRule(organization = "com.fasterxml.jackson.module"),
919934
ExclusionRule(organization = "com.fasterxml.jackson.datatype"),

project/scripts/setup_unitycatalog_main.sh

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,17 @@
4242
# 4. Open a focused PR.
4343
#
4444
# Environment overrides:
45-
# UC_DIR directory to clone into (default: /tmp/unitycatalog)
46-
# UC_REPO git remote URL (default: upstream unitycatalog)
47-
# UC_REF must be `main` or UC_PIN_SHA (default: UC_PIN_SHA below)
48-
# UC_FORCE set to "1" to rebuild even when the Ivy artifact exists
45+
# UC_DIR directory to clone into (default: /tmp/unitycatalog)
46+
# UC_REPO git remote URL (default: upstream unitycatalog)
47+
# UC_REF must be `main` or UC_PIN_SHA (default: UC_PIN_SHA below)
48+
# UC_FORCE set to "1" to rebuild even when the Ivy artifact exists
49+
# SPARK_VERSION Spark major.minor UC should build for (default: 4.1)
50+
# Forwarded as -DsparkVersion to UC's sbt; also determines the published artifact
51+
# name (unitycatalog-spark_${X.Y}_2.13). Delta's build.sbt sets this from
52+
# CrossSparkVersions when invoking the script; matrix CI workflows set it from
53+
# `matrix.spark_version`. Default matches UC's own default in
54+
# project/spark-versions.json - workflows that don't care which Spark variant UC
55+
# builds (kernel/flink/etc.) inherit it.
4956
#
5057
# UC_REF is restricted to exactly two values by design: the pinned SHA (the normal case) or
5158
# `main` (for the floating-main canary flow). Any other value is rejected. CI should never set
@@ -57,14 +64,15 @@ set -euo pipefail
5764
# The pin. Bump both lines together if UC's version.sbt changed at the new SHA. build.sbt's
5865
# `unityCatalogVersion` is obtained by running this script with `--print-version`, so these two
5966
# values are the single source of truth.
60-
UC_PIN_SHA=cd6fec685e9c2ce23fd85552ffac94c7b7e9cbd6
67+
UC_PIN_SHA=af090e73979bc216a0fe8feff59a5bbce0f41f14
6168
UC_BASE_VERSION=0.5.0-SNAPSHOT
6269
# ---------------------------------------------------------------------------------------------
6370

6471
UC_DIR="${UC_DIR:-/tmp/unitycatalog}"
6572
UC_REPO="${UC_REPO:-https://github.com/unitycatalog/unitycatalog.git}"
6673
UC_REF="${UC_REF:-$UC_PIN_SHA}"
6774
UC_FORCE="${UC_FORCE:-0}"
75+
SPARK_VERSION="${SPARK_VERSION:-4.1}"
6876

6977
# Enforce the two-value contract. Anything else is either a typo or a misuse and would bypass the
7078
# safety check below.
@@ -92,15 +100,19 @@ fi
92100
# Canonical Ivy + Maven artifact paths. Delta depends on all three UC modules; sbt resolves from
93101
# ~/.ivy2/local, mvn (kernel-examples integration tests) resolves from ~/.m2/repository. If any
94102
# is missing in either layout we must re-publish.
103+
# UC publishes its Spark connector under a per-Spark-version coordinate
104+
# (e.g. unitycatalog-spark_4.1_2.13). The suffix tracks SPARK_VERSION so the
105+
# canary check matches whatever variant we tell UC to build below.
106+
UC_SPARK_ARTIFACT="unitycatalog-spark_${SPARK_VERSION}_2.13"
95107
IVY_LOCAL="$HOME/.ivy2/local/io.unitycatalog"
96108
IVY_CANARY_CLIENT="$IVY_LOCAL/unitycatalog-client/$UC_VERSION/ivys/ivy.xml"
97109
IVY_CANARY_SERVER="$IVY_LOCAL/unitycatalog-server/$UC_VERSION/ivys/ivy.xml"
98-
IVY_CANARY_SPARK="$IVY_LOCAL/unitycatalog-spark_2.13/$UC_VERSION/ivys/ivy.xml"
110+
IVY_CANARY_SPARK="$IVY_LOCAL/$UC_SPARK_ARTIFACT/$UC_VERSION/ivys/ivy.xml"
99111
IVY_CANARY_HADOOP="$IVY_LOCAL/unitycatalog-hadoop/$UC_VERSION/ivys/ivy.xml"
100112
M2_LOCAL="$HOME/.m2/repository/io/unitycatalog"
101113
M2_CANARY_CLIENT="$M2_LOCAL/unitycatalog-client/$UC_VERSION/unitycatalog-client-$UC_VERSION.pom"
102114
M2_CANARY_SERVER="$M2_LOCAL/unitycatalog-server/$UC_VERSION/unitycatalog-server-$UC_VERSION.pom"
103-
M2_CANARY_SPARK="$M2_LOCAL/unitycatalog-spark_2.13/$UC_VERSION/unitycatalog-spark_2.13-$UC_VERSION.pom"
115+
M2_CANARY_SPARK="$M2_LOCAL/$UC_SPARK_ARTIFACT/$UC_VERSION/$UC_SPARK_ARTIFACT-$UC_VERSION.pom"
104116
M2_CANARY_HADOOP="$M2_LOCAL/unitycatalog-hadoop/$UC_VERSION/unitycatalog-hadoop-$UC_VERSION.pom"
105117
ALL_CANARIES=("$IVY_CANARY_CLIENT" "$IVY_CANARY_SERVER" "$IVY_CANARY_SPARK" "$IVY_CANARY_HADOOP"
106118
"$M2_CANARY_CLIENT" "$M2_CANARY_SERVER" "$M2_CANARY_SPARK" "$M2_CANARY_HADOOP")
@@ -161,9 +173,25 @@ fi
161173
# coordinate. Applied as a persistent setting so it sticks across the two sbt invocations below.
162174
SET_VERSION_CMD="set ThisBuild / version := \"$UC_VERSION\""
163175

176+
# Force publishLocal / publishM2 to overwrite existing artifacts. UC artifacts at the same
177+
# coordinate can be left behind from a prior run (e.g. cross-Spark publish re-invokes this
178+
# script for a different sparkVersion while client/server/hadoop are already in ~/.ivy2/local
179+
# and ~/.m2 from the first invocation). publishLocalConfiguration / publishM2Configuration are
180+
# task settings scoped per-project (ThisBuild / Global don't propagate), so we set them on each
181+
# project we publish. Both configs need overriding: publishLocal uses the former, publishM2
182+
# uses the latter.
183+
SET_OVERWRITE_CMDS=()
184+
for p in client server hadoop spark; do
185+
SET_OVERWRITE_CMDS+=(
186+
"set $p / publishLocalConfiguration := ($p / publishLocalConfiguration).value.withOverwrite(true)"
187+
"set $p / publishM2Configuration := ($p / publishM2Configuration).value.withOverwrite(true)"
188+
)
189+
done
190+
164191
echo ">>> Building and publishing UC client + server to local Maven repo"
165192
./build/sbt \
166193
"$SET_VERSION_CMD" \
194+
"${SET_OVERWRITE_CMDS[@]}" \
167195
"set client / Compile / packageDoc / publishArtifact := false" \
168196
clean \
169197
client/generate \
@@ -175,10 +203,18 @@ echo ">>> Building and publishing UC client + server to local Maven repo"
175203
hadoop/publishM2
176204

177205
# spark/publishM2 can hit a transient coursier lock race - retry up to 3 times.
178-
echo ">>> Building and publishing UC spark module to local Maven repo"
206+
# -DsparkVersion picks the Spark variant UC builds against; the resulting artifact is
207+
# unitycatalog-spark_${SPARK_VERSION}_2.13, matching UC_SPARK_ARTIFACT above.
208+
# -DskipDeltaSpark=true drops UC's test-only delta-spark_X.Y dep, which would otherwise
209+
# fail resolution: we publish UC before Delta itself is built, and on snapshot Spark
210+
# versions (e.g. 4.2) no matching delta-spark release exists at all.
211+
echo ">>> Building and publishing UC spark module (Spark $SPARK_VERSION) to local Maven repo"
179212
for attempt in 1 2 3; do
180213
if ./build/sbt \
214+
-DsparkVersion="$SPARK_VERSION" \
215+
-DskipDeltaSpark=true \
181216
"$SET_VERSION_CMD" \
217+
"${SET_OVERWRITE_CMDS[@]}" \
182218
"set client / Compile / packageDoc / publishArtifact := false" \
183219
spark/publishLocal \
184220
spark/publishM2; then

0 commit comments

Comments
 (0)