Skip to content

Commit 347586a

Browse files
Set the temporaryGcsBucket to default to fs.gs.system.bucket if exists
1 parent a4c20e3 commit 347586a

File tree

3 files changed

+26
-2
lines changed

3 files changed

+26
-2
lines changed

CHANGES.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
## Next
44
* Issue #1290: Stopped using metadata for optimized count path
55
* Issue #1317: Improving OpenLineage 1.24.0+ compatibility
6-
* PR #1311 : Improve read session expired error message
6+
* PR #1311: Improve read session expired error message
7+
* PR #1320: Set the `temporaryGcsBucket` to default to `fs.gs.system.bucket` if exists, negating the need to set it in Dataproc clusters.
78

89
## 0.41.0 - 2024-09-05
910

spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ public static WriteMethod from(@Nullable String writeMethod) {
171171
public static final String BIG_NUMERIC_DEFAULT_PRECISION = "bigNumericDefaultPrecision";
172172
public static final String BIG_NUMERIC_DEFAULT_SCALE = "bigNumericDefaultScale";
173173

174+
private static final String DATAPROC_SYSTEM_BUCKET_CONFIGURATION = "fs.gs.system.bucket";
175+
174176
TableId tableId;
175177
// as the config needs to be Serializable, internally it uses
176178
// com.google.common.base.Optional<String> but externally it uses the regular java.util.Optional
@@ -398,7 +400,10 @@ public static SparkBigQueryConfig from(
398400
.orNull();
399401
config.defaultParallelism = defaultParallelism;
400402
config.temporaryGcsBucket =
401-
stripPrefix(getAnyOption(globalOptions, options, "temporaryGcsBucket"));
403+
stripPrefix(getAnyOption(globalOptions, options, "temporaryGcsBucket"))
404+
.or(
405+
com.google.common.base.Optional.fromNullable(
406+
hadoopConfiguration.get(DATAPROC_SYSTEM_BUCKET_CONFIGURATION)));
402407
config.persistentGcsBucket =
403408
stripPrefix(getAnyOption(globalOptions, options, "persistentGcsBucket"));
404409
config.persistentGcsPath = getOption(options, "persistentGcsPath");

spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryConfigTest.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,4 +1182,22 @@ public void testEnableListInferenceWithDefaultIntermediateFormat() {
11821182
assertThat(config.getIntermediateFormat())
11831183
.isEqualTo(SparkBigQueryConfig.IntermediateFormat.PARQUET_LIST_INFERENCE_ENABLED);
11841184
}
1185+
1186+
@Test
1187+
public void testSystemBucketAsDefaultTemporaryGcsBucket() {
1188+
Configuration hadoopConfiguration = new Configuration();
1189+
hadoopConfiguration.set("fs.gs.system.bucket", "foo");
1190+
SparkBigQueryConfig config =
1191+
SparkBigQueryConfig.from(
1192+
asDataSourceOptionsMap(defaultOptions),
1193+
emptyMap, // allConf
1194+
hadoopConfiguration,
1195+
emptyMap, // customDefaults
1196+
1,
1197+
new SQLConf(),
1198+
sparkVersion,
1199+
/* schema */ Optional.empty(),
1200+
/* tableIsMandatory */ true);
1201+
assertThat(config.getTemporaryGcsBucket()).hasValue("foo");
1202+
}
11851203
}

0 commit comments

Comments
 (0)