Skip to content

Commit efab033

Browse files
committed
[core] Allow pk-clustering-override without explicit DV for first-row merge engine
First-row merge engine already has built-in deletion vector semantics, so requiring users to explicitly enable deletion-vectors is unnecessary.
1 parent 72600f9 commit efab033

3 files changed

Lines changed: 53 additions & 3 deletions

File tree

docs/content/primary-key-table/pk-clustering-override.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,23 @@ CREATE TABLE my_table (
5050
);
5151
```
5252

53+
For `first-row` merge engine, deletion vectors are already built-in, so you don't need to enable them explicitly:
54+
55+
```sql
56+
CREATE TABLE my_table (
57+
id BIGINT,
58+
dt STRING,
59+
city STRING,
60+
amount DOUBLE,
61+
PRIMARY KEY (id) NOT ENFORCED
62+
) WITH (
63+
'pk-clustering-override' = 'true',
64+
'clustering.columns' = 'city',
65+
'merge-engine' = 'first-row',
66+
'bucket' = '4'
67+
);
68+
```
69+
5370
After this, data files within each bucket will be physically sorted by `city` instead of `id`. Queries like
5471
`SELECT * FROM my_table WHERE city = 'Beijing'` can skip irrelevant data files by checking their min/max statistics
5572
on the clustering column.
@@ -60,7 +77,7 @@ on the clustering column.
6077
|--------|-------------|
6178
| `pk-clustering-override` | `true` |
6279
| `clustering.columns` | Must be set (one or more non-primary-key columns) |
63-
| `deletion-vectors.enabled` | Must be `true` |
80+
| `deletion-vectors.enabled` | Must be `true` (not required for `first-row` merge engine) |
6481
| `merge-engine` | `deduplicate` (default) or `first-row` only |
6582

6683
## When to Use

paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ private static void validateForDeletionVectors(CoreOptions options) {
522522
|| options.changelogProducer() == ChangelogProducer.LOOKUP,
523523
"Deletion vectors mode is only supported for NONE/INPUT/LOOKUP changelog producer now.");
524524

525-
// pk-clustering-override mode requires deletion vectors even for first-row
525+
// pk-clustering-override mode allows deletion vectors for first-row
526526
if (!options.pkClusteringOverride()) {
527527
checkArgument(
528528
!options.mergeEngine().equals(MergeEngine.FIRST_ROW),
@@ -847,7 +847,8 @@ public static void validatePkClusteringOverride(CoreOptions options) {
847847
throw new IllegalArgumentException(
848848
"Cannot support 'pk-clustering-override' mode without 'clustering.columns'.");
849849
}
850-
if (!options.deletionVectorsEnabled()) {
850+
if (!options.deletionVectorsEnabled()
851+
&& options.mergeEngine() != CoreOptions.MergeEngine.FIRST_ROW) {
851852
throw new UnsupportedOperationException(
852853
"Cannot support deletion-vectors disabled in 'pk-clustering-override' mode.");
853854
}

paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,22 @@ public void testFirstRowBasic() throws Exception {
519519
.containsExactlyInAnyOrder(GenericRow.of(1, 100), GenericRow.of(2, 200));
520520
}
521521

522+
/** Test first-row mode without explicit deletion-vectors enabled. */
523+
@Test
524+
public void testFirstRowWithoutDeletionVectors() throws Exception {
525+
Table firstRowTable = createFirstRowTableWithoutDv();
526+
527+
// Write initial data
528+
writeRows(firstRowTable, Arrays.asList(GenericRow.of(1, 100), GenericRow.of(2, 200)));
529+
530+
// Write same keys with different values - should be ignored (first-row keeps first)
531+
writeRows(firstRowTable, Arrays.asList(GenericRow.of(1, 999), GenericRow.of(2, 888)));
532+
533+
// Should still see the first values
534+
assertThat(readRows(firstRowTable))
535+
.containsExactlyInAnyOrder(GenericRow.of(1, 100), GenericRow.of(2, 200));
536+
}
537+
522538
/** Test first-row mode with multiple commits. */
523539
@Test
524540
public void testFirstRowMultipleCommits() throws Exception {
@@ -915,6 +931,22 @@ private Table createFirstRowTable() throws Exception {
915931
return catalog.getTable(identifier);
916932
}
917933

934+
private Table createFirstRowTableWithoutDv() throws Exception {
935+
Identifier identifier = Identifier.create("default", "first_row_no_dv_table");
936+
Schema schema =
937+
Schema.newBuilder()
938+
.column("a", DataTypes.INT())
939+
.column("b", DataTypes.INT())
940+
.primaryKey("a")
941+
.option(BUCKET.key(), "1")
942+
.option(CLUSTERING_COLUMNS.key(), "b")
943+
.option(PK_CLUSTERING_OVERRIDE.key(), "true")
944+
.option(MERGE_ENGINE.key(), "first-row")
945+
.build();
946+
catalog.createTable(identifier, schema, false);
947+
return catalog.getTable(identifier);
948+
}
949+
918950
private void writeRows(List<GenericRow> rows) throws Exception {
919951
writeRows(table, rows);
920952
}

0 commit comments

Comments
 (0)