NVIDIA · nartal1 · Mar 12, 2026 · Jan 5, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala b/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "400"}
+{"spark": "400db173"}
 {"spark": "401"}
 {"spark": "402"}
 {"spark": "411"}

@@ -169,8 +169,15 @@ function verify_same_sha_for_unshimmed() {
   # but it is compatible with previous versions because it merely adds a new method.
   # we might need to replace this strict check with MiMa
   # https://github.com/apache/spark/blob/7011706a0a8dbec6adb5b5b121921b29b314335f/sql/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala#L75-L95
+  # ProxyRapidsShuffleInternalManagerBase is not bitwise-identical when
+  # DB 17.3 is included because ShuffleManager.getReader signature differs
+  # (8-param with prismMapStatusEnabled vs 7-param). This is safe because
+  # the class provides concrete implementations for ALL getReader variants,
+  # so the JVM resolves the correct one at runtime regardless of which
+  # ShuffleManager version the class was compiled against.
   if [[ ! "$class_file_quoted" =~ com/nvidia/spark/rapids/spark[34].*/.*ShuffleManager.class && \
-          "$class_file_quoted" != "com/nvidia/spark/ParquetCachedBatchSerializer.class" ]]; then
+          "$class_file_quoted" != "com/nvidia/spark/ParquetCachedBatchSerializer.class" && \
+          ! "$class_file_quoted" =~ org/apache/spark/sql/rapids/ProxyRapidsShuffleInternalManagerBase ]]; then
       if ! grep -q "/spark.\+/$class_file_quoted" "$SPARK_SHARED_TXT"; then
         echo >&2 "$class_file is not bitwise-identical across shims"
         exit 255

diff --git a/integration_tests/src/main/python/array_test.py b/integration_tests/src/main/python/array_test.py
@@ -152,8 +152,7 @@ def test_array_item_with_strict_index(strict_index_enabled, index):
 @pytest.mark.parametrize('index', [-2, 100, array_neg_index_gen, array_out_index_gen], ids=idfn)
 @disable_ansi_mode
 def test_array_item_ansi_fail_invalid_index(index):
-    message = "SparkArrayIndexOutOfBoundsException" if (is_databricks104_or_later() or is_spark_330_or_later() and is_before_spark_400()) else \
-        "ArrayIndexOutOfBoundsException"
+    message = "ArrayIndexOutOfBoundsException"
     if isinstance(index, int):
         test_func = lambda spark: unary_op_df(spark, ArrayGen(int_gen)).select(col('a')[index]).collect()
     else:

diff --git a/..._tests/src/test/spark400/scala/org/apache/spark/sql/rapids/shims/TrampolineUtilShim.scala b/..._tests/src/test/spark400/scala/org/apache/spark/sql/rapids/shims/TrampolineUtilShim.scala
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "400"}
+{"spark": "400db173"}
 {"spark": "401"}
 {"spark": "402"}
 {"spark": "411"}

@@ -93,7 +93,7 @@ pipeline {
                         // 'name' and 'value' only supprt literal string in the declarative Jenkins
                         // Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127
                         name 'DB_RUNTIME'
-                        values '12.2', '13.3', '14.3'
+                        values '12.2', '13.3', '14.3', '17.3'
                     }
                 }
                 stages {
@@ -184,13 +184,16 @@ void databricksBuild() {
             }
         }
 
-        stage("Test agaist $SPARK_MAJOR DB") {
-            dbStep = 'TEST'
-            withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
-                def TEST_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
-                // Get built tar from the path "$testArgs" on Databricks cluster
-                // Refer to https://github.com/NVIDIA/spark-rapids/pull/11788/files#diff-db28879431d57d0e454a2c7ee89fdda9abdec463c61771333d6a6565bf96c062R52-R55
-                sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS $testArgs"
+        // TODO: Temporarily skip tests on Databricks 17.3 until the test failures are fixed
+        if (env.DB_RUNTIME != '17.3') {
+            stage("Test agaist $SPARK_MAJOR DB") {
+                dbStep = 'TEST'
+                withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
+                    def TEST_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
+                    // Get built tar from the path "$testArgs" on Databricks cluster
+                    // Refer to https://github.com/NVIDIA/spark-rapids/pull/11788/files#diff-db28879431d57d0e454a2c7ee89fdda9abdec463c61771333d6a6565bf96c062R52-R55
+                    sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS $testArgs"
+                }
             }
         }
     } finally {

@@ -119,13 +119,25 @@ initialize()
     # the version of Spark used when we install the Databricks jars in .m2
     BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS:-$BASE_SPARK_VERSION}
     SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS}-databricks
+
+     # Determine Scala version based on Spark version
+    # Spark 4.0+ uses Scala 2.13, earlier versions use 2.12
+    if [[ "$BASE_SPARK_VERSION" == 4.* ]]; then
+        export SCALA_BINARY_VER=2.13
+    fi
+
     DBR_VER=$(cat /databricks/DBR_VERSION)
     if [ $DBR_VER == '14.3' ]; then 
         DBR_VER=$(echo $DBR_VER | sed 's/\.//g')
         # We are appending 143 in addition to the base spark version because Databricks 14.3
         # and Databricks 15.4 are both based on spark version 3.5.0
         BUILDVER="$BUILDVER$DBR_VER"
         SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS="$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS-$DBR_VER"
+    elif [ $DBR_VER == '17.3' ]; then
+        DBR_VER=$(echo $DBR_VER | sed 's/\.//g')
+        # Appending 173 for Databricks 17.3 based on Spark 4.0.0
+        BUILDVER="$BUILDVER$DBR_VER"
+        SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS="$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS-$DBR_VER"
     fi
 
     # pull normal Spark artifacts and ignore errors then install databricks jars, then build again.
@@ -145,10 +157,18 @@ initialize()
     export WORKSPACE=$PWD
     # set the retry count for mvn commands
     MVN_CMD="mvn -Dmaven.wagon.http.retryHandler.count=3"
+
+    # Determine which pom to use based on Scala version
+    if [[ "$SCALA_BINARY_VER" == "2.13" ]]; then
+        POM_FILE="scala2.13/pom.xml"
+    else
+        POM_FILE="pom.xml"
+    fi
+
     # getting the versions of CUDA, SCALA and SPARK_PLUGIN
-    SPARK_PLUGIN_JAR_VERSION=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout)
-    SCALA_VERSION=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=scala.binary.version -DforceStdout)
-    CUDA_VERSION=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=cuda.version -DforceStdout)
+    SPARK_PLUGIN_JAR_VERSION=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=project.version -DforceStdout)
+    SCALA_VERSION=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=scala.binary.version -DforceStdout)
+    CUDA_VERSION=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=cuda.version -DforceStdout)
     RAPIDS_BUILT_JAR=rapids-4-spark_$SCALA_VERSION-$SPARK_PLUGIN_JAR_VERSION.jar
     # If set to 1, skips installing dependencies into mvn repo.
     SKIP_DEP_INSTALL=${SKIP_DEP_INSTALL:-'0'}
@@ -228,18 +248,24 @@ else
 fi
 
 # Build the RAPIDS plugin by running package command for databricks
-$MVN_CMD -B -Ddatabricks -Dbuildver=$BUILDVER $MVN_PHASES -DskipTests $MVN_OPT
+$MVN_CMD -B -f $POM_FILE -Ddatabricks -Dbuildver=$BUILDVER $MVN_PHASES -DskipTests $MVN_OPT
 
 if [[ "$WITH_DEFAULT_UPSTREAM_SHIM" != "0" ]]; then
     echo "Building the default Spark shim and creating a two-shim dist jar"
-    UPSTREAM_BUILDVER=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=buildver -DforceStdout)
-    $MVN_CMD -B package -pl dist -am -DskipTests -Dmaven.scaladoc.skip $MVN_OPT \
+    UPSTREAM_BUILDVER=$($MVN_CMD help:evaluate -q -f $POM_FILE -pl dist -Dexpression=buildver -DforceStdout)
+    $MVN_CMD -B -f $POM_FILE -Dbuildver=$UPSTREAM_BUILDVER package -pl dist -am -DskipTests -Dmaven.scaladoc.skip $MVN_OPT \
         -Dincluded_buildvers=$UPSTREAM_BUILDVER,$BUILDVER
 fi
 
 # "Delete the unused object files to reduce the size of the Spark Rapids built tar."
-rm -rf dist/target/jni-deps/
-find dist/target/parallel-world/ -mindepth 1 -maxdepth 1 ! -name META-INF -exec rm -rf {} +
+# Determine the correct dist target directory based on which POM was used
+if [[ "$SCALA_BINARY_VER" == "2.13" ]]; then
+    DIST_TARGET="scala2.13/dist/target"
+else
+    DIST_TARGET="dist/target"
+fi
+rm -rf $DIST_TARGET/jni-deps/
+find $DIST_TARGET/parallel-world/ -mindepth 1 -maxdepth 1 ! -name META-INF -exec rm -rf {} +
 
 cd /home/ubuntu
 tar -zcf spark-rapids-built.tgz spark-rapids

@@ -67,7 +67,7 @@ EOF
 fi
 
 # TODO make this standard going forward
-if [[ "$SPARK_VER" == '3.5.0' ]]; then
+if [[ "$SPARK_VER" == '3.5.0' || "$SPARK_VER" == '4.0.0' ]]; then
     DB_VER_SUFFIX="${PYSP_TEST_spark_databricks_clusterUsageTags_sparkVersion//./}"
 else
     DB_VER_SUFFIX=""

@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,14 +24,18 @@ cd spark-rapids
 echo "Maven mirror is $MVN_URM_MIRROR"
 SERVER_ID='snapshots'
 SERVER_URL="$URM_URL-local"
-SCALA_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=scala.binary.version -DforceStdout`
+SCALA_VERSION=`mvn help:evaluate -q -f $POM_FILE -pl dist -Dexpression=scala.binary.version -DforceStdout`
 # remove the periods so change something like 3.2.1 to 321
 VERSION_NUM=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS//.}
 SPARK_VERSION_STR=spark$VERSION_NUM
-SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout`
-# Append 143 into the db shim version because Databricks 14.3.x and 15.4.x are both based on spark version 3.5.0
+SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -f $POM_FILE -pl dist -Dexpression=project.version -DforceStdout`
+# Append 143 or 173 into the db shim version because Databricks 14.3.x and 15.4.x are both based on spark version 3.5.0
+# and Databricks 17.3 based on Spark 4.0.0
 if [[ "$DB_RUNTIME" == "14.3"* ]]; then
     DB_SHIM_NAME="${SPARK_VERSION_STR}db143"
+elif [[ "$DB_RUNTIME" == "17.3"* ]]; then
+    # Databricks 17.3 based on Spark 4.0.0
+    DB_SHIM_NAME="${SPARK_VERSION_STR}db173"
 else
     DB_SHIM_NAME="${SPARK_VERSION_STR}db"
 fi

@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2026, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -45,6 +45,9 @@ def define_deps(spark_version, scala_version):
     elif spark_version.startswith('3.5'):
         spark_prefix = '----ws_3_5'
         mvn_prefix = '--mvn'
+    elif spark_version.startswith('4.0'):
+        spark_prefix = '----ws_4_0'
+        mvn_prefix = '--mvn'
     else:
         raise Exception(f"Unsupported Databricks version {spark.version}")
 
@@ -82,9 +85,17 @@ def define_deps(spark_version, scala_version):
                          f'{spark_prefix}--sql--hive--hive-{spark_suffix}_*.jar'),
         Artifact('org.apache.hive', 'hive-exec',
                          f'{spark_prefix}--patched-hive-with-glue--hive-exec*.jar'),
-        Artifact('org.apache.hive', 'hive-metastore-client-patched',
-                         f'{spark_prefix}--patched-hive-with-glue--hive-*-patch-{spark_suffix}_deploy.jar'),
-
+    ]                     
+    # hive-metastore-client-patched has different naming pattern in Spark 4.0
+    if spark_version.startswith('4.0'):
+       # Spark 4.0: the patch jar uses scala 2.13
+       deps += [Artifact('org.apache.hive', 'hive-metastore-client-patched',
+                    f'{spark_prefix}--patched-hive-with-glue--hive-*-patch-hive-2.3__hadoop-3.2_2.*.jar')]
+    else:
+       # Spark 3.x versions
+        deps += [Artifact('org.apache.hive', 'hive-metastore-client-patched',
+                    f'{spark_prefix}--patched-hive-with-glue--hive-*-patch-{spark_suffix}_deploy.jar')]
+    deps += [
         # Hive
         Artifact('org.apache.hive', 'hive-serde',
                  f'{prefix_ws_sp_mvn_hadoop}--org.apache.hive--hive-serde--org.apache.hive__hive-serde__*.jar'),
@@ -129,8 +140,6 @@ def define_deps(spark_version, scala_version):
                  f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-core--com.fasterxml.jackson.core__jackson-core__*.jar'),
         Artifact('com.fasterxml.jackson.core', 'jackson-annotations',
                  f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-annotations--com.fasterxml.jackson.core__jackson-annotations__*.jar'),
-        Artifact('org.apache.spark', f'spark-avro_{scala_version}',
-                 f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro--org.apache.avro*.jar' if spark_version.startswith('3.5') else f'{spark_prefix}--vendor--avro--avro-*.jar'),
         Artifact('org.apache.avro', 'avro-mapred',
                  f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro-mapred--org.apache.avro__avro-mapred__*.jar'),
         Artifact('org.apache.avro', 'avro',
@@ -147,8 +156,19 @@ def define_deps(spark_version, scala_version):
                  f'{prefix_ws_sp_mvn_hadoop}--io.netty--netty-handler--io.netty__netty-handler__*.jar'),
     ]
 
+    # spark-avro has different locations depending on Spark version
+    if spark_version.startswith('4.0'):
+        deps += [Artifact('org.apache.spark', f'spark-avro_{scala_version}',
+                         f'{spark_prefix}--connector--avro--avro-{spark_suffix}_*.jar')]
+    elif spark_version.startswith('3.5'):
+        deps += [Artifact('org.apache.spark', f'spark-avro_{scala_version}',
+                         f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro--org.apache.avro*.jar')]
+    else:
+        deps += [Artifact('org.apache.spark', f'spark-avro_{scala_version}',
+                         f'{spark_prefix}--vendor--avro--avro-*.jar')]
+
     # Parquet
-    if spark_version.startswith('3.4') or spark_version.startswith('3.5'):
+    if spark_version.startswith('3.4') or spark_version.startswith('3.5') or spark_version.startswith('4.0'):
         deps += [
         Artifact('org.apache.parquet', 'parquet-hadoop',
              f'{spark_prefix}--third_party--parquet-mr--parquet-hadoop--parquet-hadoop-shaded--*--libparquet-hadoop-internal.jar'),
@@ -159,7 +179,7 @@ def define_deps(spark_version, scala_version):
         Artifact('org.apache.parquet', 'parquet-format',
              f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--libparquet-format-structures-internal.jar'),
         Artifact('shaded.parquet.org.apache.thrift', f'shaded-parquet-thrift_{scala_version}',
-            f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--org.apache.thrift__libthrift__0.16.0.jar'),
+            f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--org.apache.thrift__libthrift__*.jar'),
         Artifact('org.apache.parquet', f'parquet-format-internal_{scala_version}',
             f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--libparquet-thrift.jar')
         ]
@@ -177,7 +197,7 @@ def define_deps(spark_version, scala_version):
 
 
     # log4j-core
-    if spark_version.startswith('3.3') or spark_version.startswith('3.4') or spark_version.startswith('3.5'):
+    if spark_version.startswith('3.3') or spark_version.startswith('3.4') or spark_version.startswith('3.5') or spark_version.startswith('4.0'):
         deps += Artifact('org.apache.logging.log4j', 'log4j-core',
                          f'{prefix_ws_sp_mvn_hadoop}--org.apache.logging.log4j--log4j-core--org.apache.logging.log4j__log4j-core__*.jar'),
 
@@ -187,19 +207,30 @@ def define_deps(spark_version, scala_version):
                  f'{prefix_ws_sp_mvn_hadoop}--org.scala-lang.modules--scala-parser-combinators_{scala_version}-*.jar')
     ]
 
-    if spark_version.startswith('3.4') or spark_version.startswith('3.5'):
+    if spark_version.startswith('3.4') or spark_version.startswith('3.5') or spark_version.startswith('4.0'):
         deps += [
         # Spark Internal Logging
-        Artifact('org.apache.spark', f'spark-common-utils_{scala_version}', f'{spark_prefix}--common--utils--common-utils-hive-2.3__hadoop-3.2_2.12_deploy.jar'),
+        Artifact('org.apache.spark', f'spark-common-utils_{scala_version}', f'{spark_prefix}--common--utils--common-utils-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar'),
         # Spark SQL API
-        Artifact('org.apache.spark', f'spark-sql-api_{scala_version}', f'{spark_prefix}--sql--api--sql-api-hive-2.3__hadoop-3.2_2.12_deploy.jar')
+        Artifact('org.apache.spark', f'spark-sql-api_{scala_version}', f'{spark_prefix}--sql--api--sql-api-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar')
         ]
 
-    if spark_version.startswith('3.5'):
+    if spark_version.startswith('3.5') or spark_version.startswith('4.0'):
         deps += [
         Artifact('org.scala-lang.modules', f'scala-collection-compat_{scala_version}',
              f'{prefix_ws_sp_mvn_hadoop}--org.scala-lang.modules--scala-collection-compat_{scala_version}--org.scala-lang.modules__scala-collection-compat_{scala_version}__2.11.0.jar'), 
-        Artifact('org.apache.avro', f'avro-connector', f'{spark_prefix}--connector--avro--avro-hive-2.3__hadoop-3.2_2.12_shaded---606136534--avro-unshaded-hive-2.3__hadoop-3.2_2.12_deploy.jar')
+        Artifact('org.apache.avro', f'avro-connector', f'{spark_prefix}--connector--avro--avro-hive-2.3__hadoop-3.2_{scala_version}_shaded--*--avro-unshaded-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar')
+        ]
+
+    # Databricks 17.3 (Spark 4.0) requires additional common-utils JARs
+    if spark_version.startswith('4.0'):
+        deps += [
+        # Logging class is in common-utils-other
+        Artifact('org.apache.spark', f'spark-common-utils-other_{scala_version}', f'{spark_prefix}--common--utils--common-utils-other-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar'),
+        # ConfigEntry and related config classes are in common-config
+        Artifact('org.apache.spark', f'spark-common-config_{scala_version}', f'{spark_prefix}--common--utils--common-config-hive-2.3__hadoop-3.2_{scala_version}_deploy.jar'),
+        Artifact('org.apache.orc', 'orc-format',
+                 f'{prefix_ws_sp_mvn_hadoop}--org.apache.orc--orc-format--org.apache.orc__orc-format__*-shaded-protobuf.jar')
         ]
 
     return deps