Skip to content

[Spark] Advertise SUPPORT_COLUMN_DEFAULT_VALUE catalog capability #16282

[Spark] Advertise SUPPORT_COLUMN_DEFAULT_VALUE catalog capability

[Spark] Advertise SUPPORT_COLUMN_DEFAULT_VALUE catalog capability #16282

name: "Delta Spark Python"
on:
push:
branches: [master, branch-*]
paths-ignore:
- '**.md'
- '**.txt'
pull_request:
branches: [master, branch-*]
paths-ignore:
- '**.md'
- '**.txt'
env:
# SECURITY: Temporal lockdown — refuse any package version published after this date.
# This date is a pre-attack baseline (before the active PyPI supply chain attack).
UV_EXCLUDE_NEWER: "2026-03-10T00:00:00Z"
jobs:
# Generate Spark versions matrix from CrossSparkVersions.scala
# This workflow tests against released versions only (no snapshots)
generate-matrix:
name: "Generate Released Spark Versions Matrix"
runs-on: ubuntu-24.04
outputs:
spark_versions: ${{ steps.generate.outputs.spark_versions }}
steps:
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
- name: install java
uses: actions/setup-java@17f84c3641ba7b8f6deff6309fc4c864478f5d62 # v3.14.1
with:
distribution: "zulu"
java-version: "17"
- name: Generate released Spark versions matrix
id: generate
run: |
# Get only released versions (exclude snapshots)
SPARK_VERSIONS=$(python3 project/scripts/get_spark_version_info.py --released-spark-versions)
echo "spark_versions=$SPARK_VERSIONS" >> $GITHUB_OUTPUT
echo "Generated released Spark versions: $SPARK_VERSIONS"
test:
name: "DSP (${{ matrix.spark_version }})"
runs-on: ubuntu-24.04
needs: generate-matrix
strategy:
matrix:
# Spark versions are dynamically generated - released versions only
spark_version: ${{ fromJson(needs.generate-matrix.outputs.spark_versions) }}
# These Scala versions must match those in the build.sbt
scala: [2.13.16]
env:
SCALA_VERSION: ${{ matrix.scala }}
SPARK_VERSION: ${{ matrix.spark_version }}
steps:
- uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0
- name: Get Spark version details
id: spark-details
run: |
# Get JVM version and full Spark version for this matrix entry
JVM_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field "${{ matrix.spark_version }}" targetJvm | jq -r)
FULL_VERSION=$(python3 project/scripts/get_spark_version_info.py --get-field "${{ matrix.spark_version }}" fullVersion | jq -r)
echo "jvm_version=$JVM_VERSION" >> $GITHUB_OUTPUT
echo "spark_full_version=$FULL_VERSION" >> $GITHUB_OUTPUT
echo "Using JVM $JVM_VERSION for Spark ${{ matrix.spark_version }} (full: $FULL_VERSION)"
- name: install java
uses: actions/setup-java@17f84c3641ba7b8f6deff6309fc4c864478f5d62 # v3.14.1
with:
distribution: "zulu"
java-version: ${{ steps.spark-details.outputs.jvm_version }}
- name: Cache Scala, SBT
uses: actions/cache@6f8efc29b200d32929f49075959781ed54ec270c # v3.5.0
with:
path: |
~/.sbt
~/.ivy2
~/.cache/coursier
# Change the key if dependencies are changed. For each key, GitHub Actions will cache the
# the above directories when we use the key for the first time. After that, each run will
# just use the cache. The cache is immutable so we need to use a new key when trying to
# cache new stuff.
key: delta-sbt-cache-spark${{ matrix.spark_version }}-scala${{ matrix.scala }}
- name: Set up uv
run: bash project/scripts/install-uv.sh
- name: Set up buf
run: bash project/scripts/install-buf.sh
- name: Install Python and dependencies
run: |
uv python install 3.10
uv venv .venv --python 3.10
# Install hash-verified locked dependencies (see .github/ci-requirements/spark-python/)
if [[ "${{ matrix.spark_version }}" == "4.0" ]]; then
uv pip install --python .venv/bin/python --require-hashes -r .github/ci-requirements/spark-python/spark4.0.lock
else
uv pip install --python .venv/bin/python --require-hashes -r .github/ci-requirements/spark-python/spark4.1.lock
fi
# pyspark installed with --no-deps: its only transitive dep (py4j) is in the lock file
uv pip install --python .venv/bin/python --no-deps pyspark==${{ steps.spark-details.outputs.spark_full_version }}
- name: Run Python tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_test.yaml
run: |
source .venv/bin/activate
TEST_PARALLELISM_COUNT=4 python run-tests.py --group spark-python --spark-version ${{ matrix.spark_version }}