Skip to content
Merged
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
71e8539
Basic infrastructure to include DB-17.3 and compile fixes
nartal1 Jan 5, 2026
251aaca
Share common shim code between spark411 and spark400db173
nartal1 Feb 3, 2026
06226ed
Share common shim code between spark400db173 and spark411
nartal1 Feb 3, 2026
3dbf61f
Remove unused imports from spark400db173 files
nartal1 Feb 3, 2026
3fcf4d1
Share GpuArrowAggregatePythonExecMeta and fix XML comment issue
nartal1 Feb 3, 2026
9938e61
Add spark400db173 support to GpuUnionExecShim
nartal1 Feb 3, 2026
d876317
fix GpuBraodcastShimsError
nartal1 Feb 3, 2026
4c0abef
Fix shuffle getReader API for DBR-17.3
nartal1 Feb 3, 2026
c987e0c
Remove unused import
nartal1 Feb 3, 2026
90a55eb
Refactor and fix warnings
nartal1 Feb 4, 2026
a7da91c
Fix 350 build error
nartal1 Feb 9, 2026
6cf0424
fix spark400 build
nartal1 Feb 9, 2026
be6015e
Fix GpuWindowInPandasExec build error
nartal1 Feb 9, 2026
9e41d11
fix spark400 build errors
nartal1 Feb 9, 2026
027d4e7
fix build error for db173
nartal1 Feb 10, 2026
cde5d18
Refactor GpuScalarSubquery
nartal1 Feb 10, 2026
2478b74
Refactor RapidsShuffleManagerShim
nartal1 Feb 10, 2026
01f8fdc
Enable DBR-17.3 shim by default
nartal1 Feb 17, 2026
6336e8d
Implement functions in GpuSHuffleExchange
nartal1 Feb 19, 2026
96e8e2c
clean up script
nartal1 Feb 20, 2026
2d8d363
Fix scala style issue
nartal1 Feb 20, 2026
1e408d8
Simplify building the scala2.13 OSS Spark version
nartal1 Feb 20, 2026
8c55648
fix copyright year, extra lines, comments etc
nartal1 Feb 23, 2026
5134b48
Merge branch 'main' of github.com:NVIDIA/spark-rapids into databricks…
nartal1 Feb 26, 2026
267a476
remove desupported shims
nartal1 Feb 27, 2026
c032f46
remove/refactor redundant files
nartal1 Feb 27, 2026
75283ec
fix scala style warnings
nartal1 Mar 2, 2026
700ec50
refactor code
nartal1 Mar 2, 2026
db1ca47
refactor code further
nartal1 Mar 2, 2026
21c17d1
split/shim shimExpressions.scala
nartal1 Mar 2, 2026
cc1bd60
fix build error
nartal1 Mar 2, 2026
d0076a0
Fix build error for 14.3 shim
nartal1 Mar 2, 2026
b9b73f9
fix db-14.3 build
nartal1 Mar 2, 2026
f67e7a1
Update copyright year
nartal1 Mar 2, 2026
5ba5017
Fix some tests in array_test.py
nartal1 Mar 3, 2026
11b279d
Update error message to fix tests
nartal1 Mar 3, 2026
defb329
Fix 402 shim
nartal1 Mar 4, 2026
f8fa048
skip tests for db-17.3
nartal1 Mar 4, 2026
52b481f
Merge branch 'main' of github.com:NVIDIA/spark-rapids into databricks…
nartal1 Mar 6, 2026
12a66a8
Refactor RapidsShufflemanager to fix classLoader issue
nartal1 Mar 6, 2026
c0869e6
Include 400db143 shim in GpuParquetUtilsShims.scala
nartal1 Mar 6, 2026
0c41555
Remove correct dist target
nartal1 Mar 6, 2026
c0ab81b
Merge branch 'main' of github.com:NVIDIA/spark-rapids into databricks…
nartal1 Mar 6, 2026
c17cb0a
Merge branch 'databricks_173_support' of github.com:nartal1/spark-rap…
nartal1 Mar 6, 2026
9f06b88
Fix 358 shim
nartal1 Mar 6, 2026
2011022
addressed review comments
nartal1 Mar 10, 2026
cce248e
address review comments for jenkins scripts
nartal1 Mar 11, 2026
f617d6a
refactor getRuntimeStatistics in QueryStageRowCountShims
nartal1 Mar 11, 2026
f7640f2
Update GpuShuffleExchangeExec to include updatedRepartitioningStatus in
nartal1 Mar 11, 2026
de3cf11
Shim MetricsEventLogValidationSuite
nartal1 Mar 11, 2026
198bfc7
remove dead code
nartal1 Mar 11, 2026
77e7b12
Fix build error
nartal1 Mar 11, 2026
6b62892
Address review comments
nartal1 Mar 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

/*** spark-rapids-shim-json-lines
{"spark": "400"}
{"spark": "400db173"}
{"spark": "401"}
{"spark": "402"}
{"spark": "411"}
Expand Down
9 changes: 8 additions & 1 deletion dist/scripts/binary-dedupe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,15 @@ function verify_same_sha_for_unshimmed() {
# but it is compatible with previous versions because it merely adds a new method.
# we might need to replace this strict check with MiMa
# https://github.com/apache/spark/blob/7011706a0a8dbec6adb5b5b121921b29b314335f/sql/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala#L75-L95
# ProxyRapidsShuffleInternalManagerBase is not bitwise-identical when
# DB 17.3 is included because ShuffleManager.getReader signature differs
# (8-param with prismMapStatusEnabled vs 7-param). This is safe because
# the class provides concrete implementations for ALL getReader variants,
# so the JVM resolves the correct one at runtime regardless of which
# ShuffleManager version the class was compiled against.
if [[ ! "$class_file_quoted" =~ com/nvidia/spark/rapids/spark[34].*/.*ShuffleManager.class && \
"$class_file_quoted" != "com/nvidia/spark/ParquetCachedBatchSerializer.class" ]]; then
"$class_file_quoted" != "com/nvidia/spark/ParquetCachedBatchSerializer.class" && \
! "$class_file_quoted" =~ org/apache/spark/sql/rapids/ProxyRapidsShuffleInternalManagerBase ]]; then
if ! grep -q "/spark.\+/$class_file_quoted" "$SPARK_SHARED_TXT"; then
echo >&2 "$class_file is not bitwise-identical across shims"
exit 255
Expand Down
3 changes: 1 addition & 2 deletions integration_tests/src/main/python/array_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,7 @@ def test_array_item_with_strict_index(strict_index_enabled, index):
@pytest.mark.parametrize('index', [-2, 100, array_neg_index_gen, array_out_index_gen], ids=idfn)
@disable_ansi_mode
def test_array_item_ansi_fail_invalid_index(index):
message = "SparkArrayIndexOutOfBoundsException" if (is_databricks104_or_later() or is_spark_330_or_later() and is_before_spark_400()) else \
"ArrayIndexOutOfBoundsException"
message = "ArrayIndexOutOfBoundsException"
if isinstance(index, int):
test_func = lambda spark: unary_op_df(spark, ArrayGen(int_gen)).select(col('a')[index]).collect()
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

/*** spark-rapids-shim-json-lines
{"spark": "400"}
{"spark": "400db173"}
{"spark": "401"}
{"spark": "402"}
{"spark": "411"}
Expand Down
19 changes: 11 additions & 8 deletions jenkins/Jenkinsfile-blossom.premerge-databricks
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ pipeline {
// 'name' and 'value' only supprt literal string in the declarative Jenkins
// Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127
name 'DB_RUNTIME'
values '12.2', '13.3', '14.3'
values '12.2', '13.3', '14.3', '17.3'
}
}
stages {
Expand Down Expand Up @@ -184,13 +184,16 @@ void databricksBuild() {
}
}

stage("Test agaist $SPARK_MAJOR DB") {
dbStep = 'TEST'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def TEST_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
// Get built tar from the path "$testArgs" on Databricks cluster
// Refer to https://github.com/NVIDIA/spark-rapids/pull/11788/files#diff-db28879431d57d0e454a2c7ee89fdda9abdec463c61771333d6a6565bf96c062R52-R55
sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS $testArgs"
// TODO: Temporarily skip tests on Databricks 17.3 until the test failures are fixed
if (env.DB_RUNTIME != '17.3') {
stage("Test agaist $SPARK_MAJOR DB") {
dbStep = 'TEST'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def TEST_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
// Get built tar from the path "$testArgs" on Databricks cluster
// Refer to https://github.com/NVIDIA/spark-rapids/pull/11788/files#diff-db28879431d57d0e454a2c7ee89fdda9abdec463c61771333d6a6565bf96c062R52-R55
sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS $testArgs"
}
}
}
} finally {
Expand Down
42 changes: 34 additions & 8 deletions jenkins/databricks/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,25 @@ initialize()
# the version of Spark used when we install the Databricks jars in .m2
BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS:-$BASE_SPARK_VERSION}
SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS}-databricks

# Determine Scala version based on Spark version
# Spark 4.0+ uses Scala 2.13, earlier versions use 2.12
if [[ "$BASE_SPARK_VERSION" == 4.* ]]; then
export SCALA_BINARY_VER=2.13
fi

DBR_VER=$(cat /databricks/DBR_VERSION)
if [ $DBR_VER == '14.3' ]; then
DBR_VER=$(echo $DBR_VER | sed 's/\.//g')
# We are appending 143 in addition to the base spark version because Databricks 14.3
# and Databricks 15.4 are both based on spark version 3.5.0
BUILDVER="$BUILDVER$DBR_VER"
SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS="$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS-$DBR_VER"
elif [ $DBR_VER == '17.3' ]; then
DBR_VER=$(echo $DBR_VER | sed 's/\.//g')
# Appending 173 for Databricks 17.3 based on Spark 4.0.0
BUILDVER="$BUILDVER$DBR_VER"
SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS="$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS-$DBR_VER"
fi

# pull normal Spark artifacts and ignore errors then install databricks jars, then build again.
Expand All @@ -145,10 +157,18 @@ initialize()
export WORKSPACE=$PWD
# set the retry count for mvn commands
MVN_CMD="mvn -Dmaven.wagon.http.retryHandler.count=3"

# Determine which pom to use based on Scala version
if [[ "$SCALA_BINARY_VER" == "2.13" ]]; then
POM_FILE="scala2.13/pom.xml"
else
POM_FILE="pom.xml"
fi

# getting the versions of CUDA, SCALA and SPARK_PLUGIN
SPARK_PLUGIN_JAR_VERSION=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout)
SCALA_VERSION=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=scala.binary.version -DforceStdout)
CUDA_VERSION=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=cuda.version -DforceStdout)
SPARK_PLUGIN_JAR_VERSION=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=project.version -DforceStdout)
SCALA_VERSION=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=scala.binary.version -DforceStdout)
CUDA_VERSION=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=cuda.version -DforceStdout)
RAPIDS_BUILT_JAR=rapids-4-spark_$SCALA_VERSION-$SPARK_PLUGIN_JAR_VERSION.jar
# If set to 1, skips installing dependencies into mvn repo.
SKIP_DEP_INSTALL=${SKIP_DEP_INSTALL:-'0'}
Expand Down Expand Up @@ -228,18 +248,24 @@ else
fi

# Build the RAPIDS plugin by running package command for databricks
$MVN_CMD -B -Ddatabricks -Dbuildver=$BUILDVER $MVN_PHASES -DskipTests $MVN_OPT
$MVN_CMD -B -f $POM_FILE -Ddatabricks -Dbuildver=$BUILDVER $MVN_PHASES -DskipTests $MVN_OPT

if [[ "$WITH_DEFAULT_UPSTREAM_SHIM" != "0" ]]; then
echo "Building the default Spark shim and creating a two-shim dist jar"
UPSTREAM_BUILDVER=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=buildver -DforceStdout)
$MVN_CMD -B package -pl dist -am -DskipTests -Dmaven.scaladoc.skip $MVN_OPT \
UPSTREAM_BUILDVER=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=buildver -DforceStdout)
$MVN_CMD -B -f $POM_FILE -Dbuildver=$UPSTREAM_BUILDVER package -pl dist -am -DskipTests -Dmaven.scaladoc.skip $MVN_OPT \
-Dincluded_buildvers=$UPSTREAM_BUILDVER,$BUILDVER
fi

# "Delete the unused object files to reduce the size of the Spark Rapids built tar."
rm -rf dist/target/jni-deps/
find dist/target/parallel-world/ -mindepth 1 -maxdepth 1 ! -name META-INF -exec rm -rf {} +
# Determine the correct dist target directory based on which POM was used
if [[ "$SCALA_BINARY_VER" == "2.13" ]]; then
DIST_TARGET="scala2.13/dist/target"
else
DIST_TARGET="dist/target"
fi
rm -rf $DIST_TARGET/jni-deps/
find $DIST_TARGET/parallel-world/ -mindepth 1 -maxdepth 1 ! -name META-INF -exec rm -rf {} +

cd /home/ubuntu
tar -zcf spark-rapids-built.tgz spark-rapids
Expand Down
2 changes: 1 addition & 1 deletion jenkins/databricks/common_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ EOF
fi

# TODO make this standard going forward
if [[ "$SPARK_VER" == '3.5.0' ]]; then
if [[ "$SPARK_VER" == '3.5.0' || "$SPARK_VER" == '4.0.0' ]]; then
DB_VER_SUFFIX="${PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion//./}"
else
DB_VER_SUFFIX=""
Expand Down
12 changes: 8 additions & 4 deletions jenkins/databricks/deploy.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2020-2026, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -24,14 +24,18 @@ cd spark-rapids
echo "Maven mirror is $MVN_URM_MIRROR"
SERVER_ID='snapshots'
SERVER_URL="$URM_URL-local"
SCALA_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=scala.binary.version -DforceStdout`
SCALA_VERSION=`mvn help:evaluate -q -f $POM_FILE -pl dist -Dexpression=scala.binary.version -DforceStdout`
# remove the periods so change something like 3.2.1 to 321
VERSION_NUM=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS//.}
SPARK_VERSION_STR=spark$VERSION_NUM
SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout`
# Append 143 into the db shim version because Databricks 14.3.x and 15.4.x are both based on spark version 3.5.0
SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -f $POM_FILE -pl dist -Dexpression=project.version -DforceStdout`
# Append 143 or 173 into the db shim version because Databricks 14.3.x and 15.4.x are both based on spark version 3.5.0
# and Databricks 17.3 based on Spark 4.0.0
if [[ "$DB_RUNTIME" == "14.3"* ]]; then
DB_SHIM_NAME="${SPARK_VERSION_STR}db143"
elif [[ "$DB_RUNTIME" == "17.3"* ]]; then
# Databricks 17.3 based on Spark 4.0.0
DB_SHIM_NAME="${SPARK_VERSION_STR}db173"
else
DB_SHIM_NAME="${SPARK_VERSION_STR}db"
fi
Expand Down
59 changes: 45 additions & 14 deletions jenkins/databricks/install_deps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,6 +45,9 @@ def define_deps(spark_version, scala_version):
elif spark_version.startswith('3.5'):
spark_prefix = '----ws_3_5'
mvn_prefix = '--mvn'
elif spark_version.startswith('4.0'):
spark_prefix = '----ws_4_0'
mvn_prefix = '--mvn'
else:
raise Exception(f"Unsupported Databricks version {spark.version}")

Expand Down Expand Up @@ -82,9 +85,17 @@ def define_deps(spark_version, scala_version):
f'{spark_prefix}--sql--hive--hive-{spark_suffix}_*.jar'),
Artifact('org.apache.hive', 'hive-exec',
f'{spark_prefix}--patched-hive-with-glue--hive-exec*.jar'),
Artifact('org.apache.hive', 'hive-metastore-client-patched',
f'{spark_prefix}--patched-hive-with-glue--hive-*-patch-{spark_suffix}_deploy.jar'),

]
# hive-metastore-client-patched has different naming pattern in Spark 4.0
if spark_version.startswith('4.0'):
# Spark 4.0: the patch jar uses scala 2.13
deps += [Artifact('org.apache.hive', 'hive-metastore-client-patched',
f'{spark_prefix}--patched-hive-with-glue--hive-*-patch-hive-2.3__hadoop-3.2_2.*.jar')]
else:
# Spark 3.x versions
deps += [Artifact('org.apache.hive', 'hive-metastore-client-patched',
f'{spark_prefix}--patched-hive-with-glue--hive-*-patch-{spark_suffix}_deploy.jar')]
deps += [
# Hive
Artifact('org.apache.hive', 'hive-serde',
f'{prefix_ws_sp_mvn_hadoop}--org.apache.hive--hive-serde--org.apache.hive__hive-serde__*.jar'),
Expand Down Expand Up @@ -129,8 +140,6 @@ def define_deps(spark_version, scala_version):
f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-core--com.fasterxml.jackson.core__jackson-core__*.jar'),
Artifact('com.fasterxml.jackson.core', 'jackson-annotations',
f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-annotations--com.fasterxml.jackson.core__jackson-annotations__*.jar'),
Artifact('org.apache.spark', f'spark-avro_{scala_version}',
f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro--org.apache.avro*.jar' if spark_version.startswith('3.5') else f'{spark_prefix}--vendor--avro--avro-*.jar'),
Artifact('org.apache.avro', 'avro-mapred',
f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro-mapred--org.apache.avro__avro-mapred__*.jar'),
Artifact('org.apache.avro', 'avro',
Expand All @@ -147,8 +156,19 @@ def define_deps(spark_version, scala_version):
f'{prefix_ws_sp_mvn_hadoop}--io.netty--netty-handler--io.netty__netty-handler__*.jar'),
]

# spark-avro has different locations depending on Spark version
if spark_version.startswith('4.0'):
deps += [Artifact('org.apache.spark', f'spark-avro_{scala_version}',
f'{spark_prefix}--connector--avro--avro-{spark_suffix}_*.jar')]
elif spark_version.startswith('3.5'):
deps += [Artifact('org.apache.spark', f'spark-avro_{scala_version}',
f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro--org.apache.avro*.jar')]
else:
deps += [Artifact('org.apache.spark', f'spark-avro_{scala_version}',
f'{spark_prefix}--vendor--avro--avro-*.jar')]

# Parquet
if spark_version.startswith('3.4') or spark_version.startswith('3.5'):
if spark_version.startswith('3.4') or spark_version.startswith('3.5') or spark_version.startswith('4.0'):
deps += [
Artifact('org.apache.parquet', 'parquet-hadoop',
f'{spark_prefix}--third_party--parquet-mr--parquet-hadoop--parquet-hadoop-shaded--*--libparquet-hadoop-internal.jar'),
Expand All @@ -159,7 +179,7 @@ def define_deps(spark_version, scala_version):
Artifact('org.apache.parquet', 'parquet-format',
f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--libparquet-format-structures-internal.jar'),
Artifact('shaded.parquet.org.apache.thrift', f'shaded-parquet-thrift_{scala_version}',
f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--org.apache.thrift__libthrift__0.16.0.jar'),
f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--org.apache.thrift__libthrift__*.jar'),
Artifact('org.apache.parquet', f'parquet-format-internal_{scala_version}',
f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--libparquet-thrift.jar')
]
Expand All @@ -177,7 +197,7 @@ def define_deps(spark_version, scala_version):


# log4j-core
if spark_version.startswith('3.3') or spark_version.startswith('3.4') or spark_version.startswith('3.5'):
if spark_version.startswith('3.3') or spark_version.startswith('3.4') or spark_version.startswith('3.5') or spark_version.startswith('4.0'):
deps += Artifact('org.apache.logging.log4j', 'log4j-core',
f'{prefix_ws_sp_mvn_hadoop}--org.apache.logging.log4j--log4j-core--org.apache.logging.log4j__log4j-core__*.jar'),

Expand All @@ -187,19 +207,30 @@ def define_deps(spark_version, scala_version):
f'{prefix_ws_sp_mvn_hadoop}--org.scala-lang.modules--scala-parser-combinators_{scala_version}-*.jar')
]

if spark_version.startswith('3.4') or spark_version.startswith('3.5'):
if spark_version.startswith('3.4') or spark_version.startswith('3.5') or spark_version.startswith('4.0'):
deps += [
# Spark Internal Logging
Artifact('org.apache.spark', f'spark-common-utils_{scala_version}', f'{spark_prefix}--common--utils--common-utils-hive-2.3__hadoop-3.2_2.12_deploy.jar'),
Artifact('org.apache.spark', f'spark-common-utils_{scala_version}', f'{spark_prefix}--common--utils--common-utils-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar'),
# Spark SQL API
Artifact('org.apache.spark', f'spark-sql-api_{scala_version}', f'{spark_prefix}--sql--api--sql-api-hive-2.3__hadoop-3.2_2.12_deploy.jar')
Artifact('org.apache.spark', f'spark-sql-api_{scala_version}', f'{spark_prefix}--sql--api--sql-api-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar')
]

if spark_version.startswith('3.5'):
if spark_version.startswith('3.5') or spark_version.startswith('4.0'):
deps += [
Artifact('org.scala-lang.modules', f'scala-collection-compat_{scala_version}',
f'{prefix_ws_sp_mvn_hadoop}--org.scala-lang.modules--scala-collection-compat_{scala_version}--org.scala-lang.modules__scala-collection-compat_{scala_version}__2.11.0.jar'),
Artifact('org.apache.avro', f'avro-connector', f'{spark_prefix}--connector--avro--avro-hive-2.3__hadoop-3.2_2.12_shaded---606136534--avro-unshaded-hive-2.3__hadoop-3.2_2.12_deploy.jar')
Artifact('org.apache.avro', f'avro-connector', f'{spark_prefix}--connector--avro--avro-hive-2.3__hadoop-3.2_{scala_version}_shaded--*--avro-unshaded-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar')
]

# Databricks 17.3 (Spark 4.0) requires additional common-utils JARs
if spark_version.startswith('4.0'):
deps += [
# Logging class is in common-utils-other
Artifact('org.apache.spark', f'spark-common-utils-other_{scala_version}', f'{spark_prefix}--common--utils--common-utils-other-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar'),
# ConfigEntry and related config classes are in common-config
Artifact('org.apache.spark', f'spark-common-config_{scala_version}', f'{spark_prefix}--common--utils--common-config-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar'),
Artifact('org.apache.orc', 'orc-format',
f'{prefix_ws_sp_mvn_hadoop}--org.apache.orc--orc-format--org.apache.orc__orc-format__*-shaded-protobuf.jar')
]

return deps
Expand Down
Loading
Loading