Skip to content

Commit 83c83e7

Browse files
authored
Integration tests with require_partition_filter = true for DPO time partitioned tables (#1322)
1 parent b842be6 commit 83c83e7

File tree

1 file changed

+38
-30
lines changed

1 file changed

+38
-30
lines changed

spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/WriteIntegrationTestBase.java

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,7 +1579,8 @@ public void testWriteToTimestampField() {
15791579
assertThat(head.get(head.fieldIndex("timestamp1"))).isEqualTo(timestamp1);
15801580
}
15811581

1582-
protected Dataset<Row> writeAndLoadDatasetOverwriteDynamicPartition(Dataset<Row> df) {
1582+
protected Dataset<Row> writeAndLoadDatasetOverwriteDynamicPartition(
1583+
Dataset<Row> df, boolean isPartitioned) {
15831584
df.write()
15841585
.format("bigquery")
15851586
.mode(SaveMode.Overwrite)
@@ -1591,6 +1592,13 @@ protected Dataset<Row> writeAndLoadDatasetOverwriteDynamicPartition(Dataset<Row>
15911592
.option("temporaryGcsBucket", TestConstants.TEMPORARY_GCS_BUCKET)
15921593
.save();
15931594

1595+
if (isPartitioned) {
1596+
IntegrationTestUtils.runQuery(
1597+
String.format(
1598+
"ALTER TABLE %s.%s SET OPTIONS (require_partition_filter = false)",
1599+
testDataset, testTable));
1600+
}
1601+
15941602
return spark
15951603
.read()
15961604
.format("bigquery")
@@ -1607,9 +1615,9 @@ public void testOverwriteDynamicPartition_partitionTimestampByHour() {
16071615
IntegrationTestUtils.runQuery(
16081616
String.format(
16091617
"CREATE TABLE `%s.%s` (%s INTEGER, %s TIMESTAMP) "
1610-
+ "PARTITION BY timestamp_trunc(order_date_time, HOUR) "
1618+
+ "PARTITION BY timestamp_trunc(order_date_time, HOUR) OPTIONS (require_partition_filter = true) "
16111619
+ "AS SELECT * FROM UNNEST([(1, TIMESTAMP '2023-09-28 1:00:00 UTC'), "
1612-
+ "(2, TIMESTAMP '2023-09-28 10:00:00 UTC'), (3, TIMESTAMP '2023-09-28 10:30:00 UTC')])",
1620+
+ "(2, TIMESTAMP '2023-09-28 10:00:00 UTC'), (3, TIMESTAMP '2023-09-28 10:30:00 UTC')]) ",
16131621
testDataset, testTable, orderId, orderDateTime));
16141622

16151623
Dataset<Row> df =
@@ -1621,7 +1629,7 @@ public void testOverwriteDynamicPartition_partitionTimestampByHour() {
16211629
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
16221630
StructField.apply(orderDateTime, DataTypes.TimestampType, true, Metadata.empty())));
16231631

1624-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1632+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
16251633
assertThat(result.count()).isEqualTo(3);
16261634
List<Row> rows = result.collectAsList();
16271635
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1651,7 +1659,7 @@ public void testOverwriteDynamicPartition_partitionTimestampByDay() {
16511659
IntegrationTestUtils.runQuery(
16521660
String.format(
16531661
"CREATE TABLE `%s.%s` (%s INTEGER, %s TIMESTAMP) "
1654-
+ "PARTITION BY DATE(order_date_time) "
1662+
+ "PARTITION BY DATE(order_date_time) OPTIONS (require_partition_filter = true) "
16551663
+ "AS SELECT * FROM UNNEST([(1, TIMESTAMP '2023-09-28 1:00:00 UTC'), "
16561664
+ "(2, TIMESTAMP '2023-09-29 10:00:00 UTC'), (3, TIMESTAMP '2023-09-29 17:00:00 UTC')])",
16571665
testDataset, testTable, orderId, orderDateTime));
@@ -1665,7 +1673,7 @@ public void testOverwriteDynamicPartition_partitionTimestampByDay() {
16651673
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
16661674
StructField.apply(orderDateTime, DataTypes.TimestampType, true, Metadata.empty())));
16671675

1668-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1676+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
16691677
assertThat(result.count()).isEqualTo(3);
16701678
List<Row> rows = result.collectAsList();
16711679
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1695,7 +1703,7 @@ public void testOverwriteDynamicPartition_partitionTimestampByMonth() {
16951703
IntegrationTestUtils.runQuery(
16961704
String.format(
16971705
"CREATE TABLE `%s.%s` (%s INTEGER, %s TIMESTAMP) "
1698-
+ "PARTITION BY timestamp_trunc(order_date_time, MONTH) "
1706+
+ "PARTITION BY timestamp_trunc(order_date_time, MONTH) OPTIONS (require_partition_filter = true) "
16991707
+ "AS SELECT * FROM UNNEST([(1, TIMESTAMP '2023-09-28 1:00:00 UTC'), "
17001708
+ "(2, TIMESTAMP '2023-10-20 10:00:00 UTC'), (3, TIMESTAMP '2023-10-25 12:00:00 UTC')])",
17011709
testDataset, testTable, orderId, orderDateTime));
@@ -1709,7 +1717,7 @@ public void testOverwriteDynamicPartition_partitionTimestampByMonth() {
17091717
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
17101718
StructField.apply(orderDateTime, DataTypes.TimestampType, true, Metadata.empty())));
17111719

1712-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1720+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
17131721
assertThat(result.count()).isEqualTo(3);
17141722
List<Row> rows = result.collectAsList();
17151723
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1739,7 +1747,7 @@ public void testOverwriteDynamicPartition_partitionTimestampByYear() {
17391747
IntegrationTestUtils.runQuery(
17401748
String.format(
17411749
"CREATE TABLE `%s.%s` (%s INTEGER, %s TIMESTAMP) "
1742-
+ "PARTITION BY timestamp_trunc(order_date_time, YEAR) "
1750+
+ "PARTITION BY timestamp_trunc(order_date_time, YEAR) OPTIONS (require_partition_filter = true) "
17431751
+ "AS SELECT * FROM UNNEST([(1, TIMESTAMP '2022-09-28 1:00:00 UTC'), "
17441752
+ "(2, TIMESTAMP '2023-10-20 10:00:00 UTC'), (2, TIMESTAMP '2023-10-25 12:00:00 UTC')])",
17451753
testDataset, testTable, orderId, orderDateTime));
@@ -1753,7 +1761,7 @@ public void testOverwriteDynamicPartition_partitionTimestampByYear() {
17531761
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
17541762
StructField.apply(orderDateTime, DataTypes.TimestampType, true, Metadata.empty())));
17551763

1756-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1764+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
17571765
assertThat(result.count()).isEqualTo(3);
17581766
List<Row> rows = result.collectAsList();
17591767
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1783,7 +1791,7 @@ public void testOverwriteDynamicPartition_partitionDateByDay() {
17831791
IntegrationTestUtils.runQuery(
17841792
String.format(
17851793
"CREATE TABLE `%s.%s` (%s INTEGER, %s DATE) "
1786-
+ "PARTITION BY order_date "
1794+
+ "PARTITION BY order_date OPTIONS (require_partition_filter = true) "
17871795
+ "AS SELECT * FROM UNNEST([(1, DATE('2023-09-28')), (2, DATE('2023-09-29'))])",
17881796
testDataset, testTable, orderId, orderDate));
17891797

@@ -1796,7 +1804,7 @@ public void testOverwriteDynamicPartition_partitionDateByDay() {
17961804
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
17971805
StructField.apply(orderDate, DataTypes.DateType, true, Metadata.empty())));
17981806

1799-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1807+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
18001808
assertThat(result.count()).isEqualTo(3);
18011809
List<Row> rows = result.collectAsList();
18021810
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1823,7 +1831,7 @@ public void testOverwriteDynamicPartition_partitionDateByMonth() {
18231831
IntegrationTestUtils.runQuery(
18241832
String.format(
18251833
"CREATE TABLE `%s.%s` (%s INTEGER, %s DATE) "
1826-
+ "PARTITION BY DATE_TRUNC(order_date, MONTH) "
1834+
+ "PARTITION BY DATE_TRUNC(order_date, MONTH) OPTIONS (require_partition_filter = true) "
18271835
+ "AS SELECT * FROM UNNEST([(1, DATE('2023-09-28')), "
18281836
+ "(2, DATE('2023-10-29')), (2, DATE('2023-10-28'))])",
18291837
testDataset, testTable, orderId, orderDate));
@@ -1837,7 +1845,7 @@ public void testOverwriteDynamicPartition_partitionDateByMonth() {
18371845
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
18381846
StructField.apply(orderDate, DataTypes.DateType, true, Metadata.empty())));
18391847

1840-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1848+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
18411849
assertThat(result.count()).isEqualTo(3);
18421850
List<Row> rows = result.collectAsList();
18431851
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1864,7 +1872,7 @@ public void testOverwriteDynamicPartition_partitionDateByYear() {
18641872
IntegrationTestUtils.runQuery(
18651873
String.format(
18661874
"CREATE TABLE `%s.%s` (%s INTEGER, %s DATE) "
1867-
+ "PARTITION BY DATE_TRUNC(order_date, YEAR) "
1875+
+ "PARTITION BY DATE_TRUNC(order_date, YEAR) OPTIONS (require_partition_filter = true) "
18681876
+ "AS SELECT * FROM UNNEST([(1, DATE('2022-09-28')), "
18691877
+ "(2, DATE('2023-10-29')), (2, DATE('2023-11-28'))])",
18701878
testDataset, testTable, orderId, orderDate));
@@ -1878,7 +1886,7 @@ public void testOverwriteDynamicPartition_partitionDateByYear() {
18781886
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
18791887
StructField.apply(orderDate, DataTypes.DateType, true, Metadata.empty())));
18801888

1881-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1889+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
18821890
assertThat(result.count()).isEqualTo(3);
18831891
List<Row> rows = result.collectAsList();
18841892
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1906,7 +1914,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByHour() {
19061914
IntegrationTestUtils.runQuery(
19071915
String.format(
19081916
"CREATE TABLE `%s.%s` (%s INTEGER, %s DATETIME) "
1909-
+ "PARTITION BY timestamp_trunc(order_date_time, HOUR) "
1917+
+ "PARTITION BY timestamp_trunc(order_date_time, HOUR) OPTIONS (require_partition_filter = true) "
19101918
+ "AS SELECT * FROM UNNEST([(1, DATETIME '2023-09-28 1:00:00'), "
19111919
+ "(2, DATETIME '2023-09-28 10:00:00'), (3, DATETIME '2023-09-28 10:30:00')])",
19121920
testDataset, testTable, orderId, orderDateTime));
@@ -1920,7 +1928,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByHour() {
19201928
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
19211929
StructField.apply(orderDateTime, timeStampNTZType.get(), true, Metadata.empty())));
19221930

1923-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1931+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
19241932
assertThat(result.count()).isEqualTo(3);
19251933
List<Row> rows = result.collectAsList();
19261934
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1948,7 +1956,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByDay() {
19481956
IntegrationTestUtils.runQuery(
19491957
String.format(
19501958
"CREATE TABLE `%s.%s` (%s INTEGER, %s DATETIME) "
1951-
+ "PARTITION BY timestamp_trunc(order_date_time, DAY) "
1959+
+ "PARTITION BY timestamp_trunc(order_date_time, DAY) OPTIONS (require_partition_filter = true) "
19521960
+ "AS SELECT * FROM UNNEST([(1, DATETIME '2023-09-28 1:00:00'), "
19531961
+ "(2, DATETIME '2023-09-29 10:00:00'), (3, DATETIME '2023-09-29 17:30:00')])",
19541962
testDataset, testTable, orderId, orderDateTime));
@@ -1962,7 +1970,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByDay() {
19621970
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
19631971
StructField.apply(orderDateTime, timeStampNTZType.get(), true, Metadata.empty())));
19641972

1965-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
1973+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
19661974
assertThat(result.count()).isEqualTo(3);
19671975
List<Row> rows = result.collectAsList();
19681976
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -1990,7 +1998,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByMonth() {
19901998
IntegrationTestUtils.runQuery(
19911999
String.format(
19922000
"CREATE TABLE `%s.%s` (%s INTEGER, %s DATETIME) "
1993-
+ "PARTITION BY timestamp_trunc(order_date_time, MONTH) "
2001+
+ "PARTITION BY timestamp_trunc(order_date_time, MONTH) OPTIONS (require_partition_filter = true) "
19942002
+ "AS SELECT * FROM UNNEST([(1, DATETIME '2023-09-28 1:00:00'), "
19952003
+ "(2, DATETIME '2023-10-29 10:00:00'), (3, DATETIME '2023-10-29 17:30:00')])",
19962004
testDataset, testTable, orderId, orderDateTime));
@@ -2004,7 +2012,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByMonth() {
20042012
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
20052013
StructField.apply(orderDateTime, timeStampNTZType.get(), true, Metadata.empty())));
20062014

2007-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2015+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
20082016
assertThat(result.count()).isEqualTo(3);
20092017
List<Row> rows = result.collectAsList();
20102018
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -2032,7 +2040,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByYear() {
20322040
IntegrationTestUtils.runQuery(
20332041
String.format(
20342042
"CREATE TABLE `%s.%s` (%s INTEGER, %s DATETIME) "
2035-
+ "PARTITION BY timestamp_trunc(order_date_time, YEAR) "
2043+
+ "PARTITION BY timestamp_trunc(order_date_time, YEAR) OPTIONS (require_partition_filter = true) "
20362044
+ "AS SELECT * FROM UNNEST([(1, DATETIME '2022-09-28 1:00:00'), "
20372045
+ "(2, DATETIME '2023-10-29 10:00:00'), (3, DATETIME '2023-11-29 17:30:00')])",
20382046
testDataset, testTable, orderId, orderDateTime));
@@ -2046,7 +2054,7 @@ public void testOverwriteDynamicPartition_partitionDateTimeByYear() {
20462054
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
20472055
StructField.apply(orderDateTime, timeStampNTZType.get(), true, Metadata.empty())));
20482056

2049-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2057+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
20502058
assertThat(result.count()).isEqualTo(3);
20512059
List<Row> rows = result.collectAsList();
20522060
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -2086,7 +2094,7 @@ public void testOverwriteDynamicPartition_noTimePartitioning() {
20862094
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
20872095
StructField.apply(orderDateTime, DataTypes.TimestampType, true, Metadata.empty())));
20882096

2089-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2097+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, false);
20902098
assertThat(result.count()).isEqualTo(2);
20912099
List<Row> rows = result.collectAsList();
20922100
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -2127,7 +2135,7 @@ public void testOverwriteDynamicPartition_rangePartitioned() {
21272135
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
21282136
StructField.apply(orderCount, DataTypes.IntegerType, true, Metadata.empty())));
21292137

2130-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2138+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
21312139
assertThat(result.count()).isEqualTo(5);
21322140
List<Row> rows = result.collectAsList();
21332141
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -2174,7 +2182,7 @@ public void testOverwriteDynamicPartition_rangePartitionedOutsideRangeLessThanSt
21742182
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
21752183
StructField.apply(orderCount, DataTypes.IntegerType, true, Metadata.empty())));
21762184

2177-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2185+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
21782186
assertThat(result.count()).isEqualTo(2);
21792187
List<Row> rows = result.collectAsList();
21802188
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -2209,7 +2217,7 @@ public void testOverwriteDynamicPartition_rangePartitionedOutsideRangeGreaterTha
22092217
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
22102218
StructField.apply(orderCount, DataTypes.IntegerType, true, Metadata.empty())));
22112219

2212-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2220+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
22132221
assertThat(result.count()).isEqualTo(2);
22142222
List<Row> rows = result.collectAsList();
22152223
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -2244,7 +2252,7 @@ public void testOverwriteDynamicPartition_rangePartitionedBoundaryCondition() {
22442252
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
22452253
StructField.apply(orderCount, DataTypes.IntegerType, true, Metadata.empty())));
22462254

2247-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2255+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
22482256
assertThat(result.count()).isEqualTo(3);
22492257
List<Row> rows = result.collectAsList();
22502258
rows.sort(Comparator.comparing(row -> row.getLong(row.fieldIndex(orderId))));
@@ -2283,7 +2291,7 @@ public void testOverwriteDynamicPartition_rangePartitionedWithNulls() {
22832291
StructField.apply(orderId, DataTypes.IntegerType, true, Metadata.empty()),
22842292
StructField.apply(orderCount, DataTypes.IntegerType, true, Metadata.empty())));
22852293

2286-
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df);
2294+
Dataset<Row> result = writeAndLoadDatasetOverwriteDynamicPartition(df, true);
22872295
assertThat(result.count()).isEqualTo(3);
22882296

22892297
List<Row> rows = result.collectAsList();

0 commit comments

Comments
 (0)