Skip to content

Commit 6d348f4

Browse files
Adding Spark 4.1 connector (#1437)
1 parent d6f041b commit 6d348f4

File tree

35 files changed

+1167
-11
lines changed

35 files changed

+1167
-11
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Release Notes
22

33
## Next
4+
* Added new connector, `spark-4.1-bigquery` aimed to be used in Spark 4.1. Like Spark 4.1, this connector requires at
5+
least Java 17 runtime. It is currently in preview mode.
46

57
## 0.43.1 - 2025-10-22
68
* Issue #1417: Fixed ClassCastException in AWS federated identity

cloudbuild/cloudbuild.yaml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,22 @@ steps:
118118
- 'BIGLAKE_CONNECTION_ID=${_BIGLAKE_CONNECTION_ID}'
119119
- 'BIGQUERY_KMS_KEY_NAME=${_BIGQUERY_KMS_KEY_NAME}'
120120

121+
# 4i. Run integration tests concurrently with unit tests (DSv2, Spark 3.5)
122+
- name: 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-presubmit'
123+
id: 'integration-tests-4.1'
124+
waitFor: ['integration-tests-3.5']
125+
entrypoint: 'bash'
126+
args: ['/workspace/cloudbuild/presubmit.sh', 'integrationtest-4.1']
127+
env:
128+
- 'GOOGLE_CLOUD_PROJECT=${_GOOGLE_CLOUD_PROJECT}'
129+
- 'TEMPORARY_GCS_BUCKET=${_TEMPORARY_GCS_BUCKET}'
130+
- 'BIGLAKE_CONNECTION_ID=${_BIGLAKE_CONNECTION_ID}'
131+
- 'BIGQUERY_KMS_KEY_NAME=${_BIGQUERY_KMS_KEY_NAME}'
132+
121133
# 5. Upload coverage to CodeCov
122134
- name: 'gcr.io/$PROJECT_ID/dataproc-spark-bigquery-connector-presubmit'
123135
id: 'upload-it-to-codecov'
124-
waitFor: ['integration-tests-2.12','integration-tests-2.13','integration-tests-3.1','integration-tests-3.2','integration-tests-3.3', 'integration-tests-3.4', 'integration-tests-3.5', 'integration-tests-4.0']
136+
waitFor: ['integration-tests-2.12','integration-tests-2.13','integration-tests-3.1','integration-tests-3.2','integration-tests-3.3', 'integration-tests-3.4', 'integration-tests-3.5', 'integration-tests-4.0', 'integration-tests-4.1']
125137
entrypoint: 'bash'
126138
args: ['/workspace/cloudbuild/presubmit.sh', 'upload-it-to-codecov']
127139
env:

cloudbuild/nightly.sh

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,17 @@ case $STEP in
4343
cat toolchains.xml
4444
# Build
4545
export JAVA_HOME=${JAVA17_HOME}
46-
$MVN -T 1C install -DskipTests -Pdsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0
46+
$MVN -T 1C install -DskipTests -Pdsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0,dsv2_4.1
4747
#coverage report
4848
export JAVA_HOME=${JAVA17_HOME}
49-
$MVN -T 1C test jacoco:report jacoco:report-aggregate -Pcoverage,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0
49+
$MVN -T 1C test jacoco:report jacoco:report-aggregate -Pcoverage,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0,dsv2_4.1
5050
# Run integration tests
5151
unset MAVEN_OPTS
5252
export JAVA_HOME=${JAVA17_HOME}
53-
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0
53+
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0,dsv2_4.1
5454
# Run acceptance tests
5555
export JAVA_HOME=${JAVA17_HOME}
56-
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,acceptance,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0
56+
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,acceptance,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0,dsv2_4.1
5757
# Upload test coverage report to Codecov
5858
bash <(curl -s https://codecov.io/bash) -K -F "nightly"
5959

@@ -91,6 +91,9 @@ case $STEP in
9191
gsutil cp "${M2REPO}/com/google/cloud/spark/spark-4.0-bigquery/${BUILD_REVISION}-preview/spark-4.0-bigquery-${BUILD_REVISION}-preview.jar" "gs://${BUCKET}"
9292
gsutil cp "gs://${BUCKET}/spark-4.0-bigquery-${BUILD_REVISION}-preview.jar" "gs://${BUCKET}/spark-4.0-bigquery-nightly-snapshot.jar"
9393

94+
gsutil cp "${M2REPO}/com/google/cloud/spark/spark-4.1-bigquery/${BUILD_REVISION}-preview/spark-4.1-bigquery-${BUILD_REVISION}-preview.jar" "gs://${BUCKET}"
95+
gsutil cp "gs://${BUCKET}/spark-4.1-bigquery-${BUILD_REVISION}-preview.jar" "gs://${BUCKET}/spark-4.1-bigquery-nightly-snapshot.jar"
96+
9497
gsutil cp "${M2REPO}/com/google/cloud/spark/spark-bigquery-metrics/${BUILD_REVISION}/spark-bigquery-metrics-${BUILD_REVISION}.jar" "gs://${BUCKET}"
9598
gsutil cp "gs://${BUCKET}/spark-bigquery-metrics-${BUILD_REVISION}.jar" "gs://${BUCKET}/spark-bigquery-metrics-nightly-snapshot.jar"
9699

@@ -99,7 +102,7 @@ case $STEP in
99102

100103
deploy)
101104
# TODO: Re-enable deployment for nightly builds.
102-
# $MVN deploy:deploy -DskipTests -Dscala.skipTests=true -Prelease-nightly,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0
105+
# $MVN deploy:deploy -DskipTests -Dscala.skipTests=true -Prelease-nightly,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0,dsv2_4.1
103106
exit
104107
;;
105108

cloudbuild/presubmit.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ case $STEP in
4141

4242
export MAVEN_OPTS=${BUILD_OPTS}
4343
export JAVA_HOME=${JAVA17_HOME}
44-
$MVN -T 1C install -DskipTests -Pdsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0
44+
$MVN -T 1C install -DskipTests -Pdsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0,dsv2_4.1
4545
exit
4646
;;
4747

4848
# Run unit tests
4949
unittest)
5050
export MAVEN_OPTS=${BUILD_OPTS}
5151
export JAVA_HOME=${JAVA17_HOME}
52-
$MVN -T 1C test jacoco:report jacoco:report-aggregate -Pcoverage,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0
52+
$MVN -T 1C test jacoco:report jacoco:report-aggregate -Pcoverage,dsv1_2.12,dsv1_2.13,dsv2_3.1,dsv2_3.2,dsv2_3.3,dsv2_3.4,dsv2_3.5,dsv2_4.0,dsv2_4.1
5353
# Upload test coverage report to Codecov
5454
bash <(curl -s https://codecov.io/bash) -K -F "${STEP}"
5555
;;
@@ -98,6 +98,11 @@ case $STEP in
9898
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv2_4.0
9999
;;
100100

101+
# Run integration tests
102+
integrationtest-4.1)
103+
$MVN failsafe:integration-test failsafe:verify jacoco:report jacoco:report-aggregate -Pcoverage,integration,dsv2_4.1
104+
;;
105+
101106
upload-it-to-codecov)
102107
checkenv
103108
# Upload test coverage report to Codecov

coverage/pom.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,19 @@
303303
</dependency>
304304
</dependencies>
305305
</profile>
306+
<profile>
307+
<id>dsv2_4.1</id>
308+
<activation>
309+
<activeByDefault>false</activeByDefault>
310+
</activation>
311+
<dependencies>
312+
<dependency>
313+
<groupId>${project.groupId}</groupId>
314+
<artifactId>spark-4.1-bigquery</artifactId>
315+
<version>${project.version}-preview</version>
316+
</dependency>
317+
</dependencies>
318+
</profile>
306319
</profiles>
307320
</project>
308321

pom.xml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,25 @@
234234
<module>spark-bigquery-dsv2/spark-4.0-bigquery</module>
235235
</modules>
236236
</profile>
237+
<profile>
238+
<id>dsv2_4.1</id>
239+
<activation>
240+
<activeByDefault>false</activeByDefault>
241+
</activation>
242+
<modules>
243+
<module>spark-bigquery-dsv2/spark-bigquery-dsv2-common</module>
244+
<module>spark-bigquery-dsv2/spark-bigquery-dsv2-parent</module>
245+
<module>spark-bigquery-dsv2/spark-3.1-bigquery-lib</module>
246+
<module>spark-bigquery-dsv2/spark-bigquery-metrics</module>
247+
<module>spark-bigquery-dsv2/spark-3.2-bigquery-lib</module>
248+
<module>spark-bigquery-dsv2/spark-3.3-bigquery-lib</module>
249+
<module>spark-bigquery-dsv2/spark-3.4-bigquery-lib</module>
250+
<module>spark-bigquery-dsv2/spark-3.5-bigquery-lib</module>
251+
<module>spark-bigquery-dsv2/spark-4.0-bigquery-lib</module>
252+
<module>spark-bigquery-dsv2/spark-4.1-bigquery-lib</module>
253+
<module>spark-bigquery-dsv2/spark-4.1-bigquery</module>
254+
</modules>
255+
</profile>
237256
<profile>
238257
<id>coverage</id>
239258
<activation>

spark-bigquery-dsv2/pom.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
<modules>
1616
<module>spark-bigquery-dsv2-common</module>
1717
<module>spark-bigquery-dsv2-parent</module>
18-
<module>spark-2.4-bigquery</module>
1918
<module>spark-3.1-bigquery-lib</module>
2019
<module>spark-3.1-bigquery</module>
2120
<module>spark-bigquery-metrics</module>
@@ -29,5 +28,7 @@
2928
<module>spark-3.5-bigquery</module>
3029
<module>spark-4.0-bigquery-lib</module>
3130
<module>spark-4.0-bigquery</module>
31+
<module>spark-4.1-bigquery-lib</module>
32+
<module>spark-4.1-bigquery</module>
3233
</modules>
3334
</project>
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
com.google.cloud.spark.bigquery.v2.Spark35BigQueryTableProvider
1+
com.google.cloud.spark.bigquery.v2.Spark40BigQueryTableProvider
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0"
2+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4+
<modelVersion>4.0.0</modelVersion>
5+
<parent>
6+
<groupId>com.google.cloud.spark</groupId>
7+
<artifactId>spark-bigquery-dsv2-parent</artifactId>
8+
<version>${revision}</version>
9+
<relativePath>../spark-bigquery-dsv2-parent</relativePath>
10+
</parent>
11+
12+
<artifactId>spark-4.1-bigquery-lib</artifactId>
13+
<version>${revision}-preview</version>
14+
<name>Connector code for BigQuery DataSource v2 for Spark 4.1</name>
15+
<properties>
16+
<maven.compiler.release>17</maven.compiler.release>
17+
<spark.version>4.1.0-preview3</spark.version>
18+
<scala.binary.version>2.13</scala.binary.version>
19+
<shade.skip>true</shade.skip>
20+
<toolchain.jdk.version>[17,18)</toolchain.jdk.version>
21+
<argLine>${jdk11plus.argLine}</argLine>
22+
</properties>
23+
<licenses>
24+
<license>
25+
<name>Apache License, Version 2.0</name>
26+
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
27+
<distribution>repo</distribution>
28+
</license>
29+
</licenses>
30+
<dependencyManagement>
31+
<dependencies>
32+
<dependency>
33+
<groupId>org.apache.arrow</groupId>
34+
<artifactId>arrow-bom</artifactId>
35+
<version>${arrow.spark4.version}</version>
36+
<scope>import</scope>
37+
<type>pom</type>
38+
</dependency>
39+
<dependency>
40+
<groupId>org.apache.arrow</groupId>
41+
<artifactId>arrow-memory-netty-buffer-patch</artifactId>
42+
<version>${arrow.spark4.version}</version>
43+
</dependency>
44+
</dependencies>
45+
</dependencyManagement>
46+
<dependencies>
47+
<dependency>
48+
<groupId>${project.groupId}</groupId>
49+
<artifactId>spark-4.0-bigquery-lib</artifactId>
50+
<version>${project.parent.version}-preview</version>
51+
</dependency>
52+
<dependency>
53+
<groupId>org.apache.spark</groupId>
54+
<artifactId>spark-avro_2.13</artifactId>
55+
<version>${spark.version}</version>
56+
<scope>test</scope>
57+
</dependency>
58+
</dependencies>
59+
</project>
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright 2025 Google Inc. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.spark.bigquery.v2;
17+
18+
import com.google.inject.Injector;
19+
import java.util.function.Supplier;
20+
import org.apache.spark.sql.SaveMode;
21+
import org.apache.spark.sql.connector.write.LogicalWriteInfo;
22+
import org.apache.spark.sql.connector.write.WriteBuilder;
23+
import org.apache.spark.sql.types.StructType;
24+
25+
public class Spark41BigQueryTable extends Spark40BigQueryTable {
26+
public Spark41BigQueryTable(Injector injector, Supplier<StructType> schemaSupplier) {
27+
super(injector, schemaSupplier);
28+
}
29+
30+
@Override
31+
public WriteBuilder newWriteBuilder(LogicalWriteInfo info) {
32+
// SaveMode is not provided by spark 3, it is handled by the DataFrameWriter
33+
// The case where mode == SaveMode.Ignore is handled by Spark, so we can assume we can get the
34+
// context
35+
return new Spark41BigQueryWriteBuilder(injector, info, SaveMode.Append);
36+
}
37+
}

0 commit comments

Comments
 (0)