-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Spark: SnapshotTableSparkAction add validation for non-overlapping source/dest table paths. #12779
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
7a0dee5
7d4bccd
5b26247
3f14f7f
f86befe
2a7e435
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,12 +19,16 @@ | |
package org.apache.iceberg.spark.actions; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
import static org.assertj.core.api.Assertions.assertThatThrownBy; | ||
import static org.junit.jupiter.api.Assumptions.assumeTrue; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.util.Map; | ||
import java.util.concurrent.Executors; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import org.apache.iceberg.ParameterizedTestExtension; | ||
import org.apache.iceberg.relocated.com.google.common.collect.Maps; | ||
import org.apache.iceberg.spark.CatalogTestBase; | ||
import org.junit.jupiter.api.AfterEach; | ||
import org.junit.jupiter.api.TestTemplate; | ||
|
@@ -65,4 +69,39 @@ public void testSnapshotWithParallelTasks() throws IOException { | |
.execute(); | ||
assertThat(snapshotThreadsIndex.get()).isEqualTo(2); | ||
} | ||
|
||
@TestTemplate | ||
public void testTableLocationOverlapThrowsException() throws IOException { | ||
// Ensure the test runs only for non-Hadoop-based catalogs, | ||
// because path-based tables cannot have a custom location set. | ||
assumeTrue( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for your suggestion! I have improved this part of the code. |
||
!catalogName.equals("testhadoop"), "Cannot set a custom location for a path-based table."); | ||
|
||
String location = Files.createTempDirectory(temp, "junit").toFile().toString(); | ||
|
||
sql( | ||
"CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'", | ||
SOURCE_NAME, location); | ||
sql("INSERT INTO TABLE %s VALUES (1, 'a')", SOURCE_NAME); | ||
sql("INSERT INTO TABLE %s VALUES (2, 'b')", SOURCE_NAME); | ||
|
||
// Define properties for the destination table, setting its location to the same path as the | ||
// source table | ||
Map<String, String> tableProperties = Maps.newHashMap(); | ||
tableProperties.put("location", "file:" + location); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We make it single line: Map<String, String> tableProperties = Map.of("location", "file:" + location); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I concur. This is much cleaner. |
||
|
||
// Test that an exception is thrown | ||
// when the destination table location overlaps with the source table location | ||
// Assert that the exception message matches the expected error message | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I would remove this comment. This is obvious when reading the following code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I concur with this comment as well. Given that in other places in the code base where the caching of IllegalArgumentException is being tested doesn't have any explanation in the comments earthier I don't think the comment is needed here as well. It wouldn't be a divergence from rest of the codebase. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for the suggestion. I have removed this part of the comment. |
||
assertThatThrownBy( | ||
() -> | ||
SparkActions.get() | ||
.snapshotTable(SOURCE_NAME) | ||
.as(tableName) | ||
.tableProperties(tableProperties) | ||
.execute()) | ||
.isInstanceOf(IllegalArgumentException.class) | ||
.hasMessageContaining( | ||
"The destination table location overlaps with the source table location."); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The following
.
can be removed. Most existing error messages don't end with.
unless they have several sentences.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you decide to remove the full stop, don't forget to remove it from the test as well so that it will not fail.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for helping review the code! I have made improvements to this part of the code.