Skip to content

Commit 9481c19

Browse files
authored
[lake/paimon] Prohibit users from setting Paimon properties that Fluss depends on (apache#1826)
1 parent 62666f3 commit 9481c19

File tree

3 files changed

+174
-108
lines changed

3 files changed

+174
-108
lines changed

fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/PaimonLakeCatalog.java

Lines changed: 5 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import org.apache.fluss.annotation.VisibleForTesting;
2121
import org.apache.fluss.config.Configuration;
22-
import org.apache.fluss.exception.InvalidTableException;
2322
import org.apache.fluss.exception.TableAlreadyExistException;
2423
import org.apache.fluss.exception.TableNotExistException;
2524
import org.apache.fluss.lake.lakestorage.LakeCatalog;
@@ -28,7 +27,6 @@
2827
import org.apache.fluss.metadata.TablePath;
2928
import org.apache.fluss.utils.IOUtils;
3029

31-
import org.apache.paimon.CoreOptions;
3230
import org.apache.paimon.catalog.Catalog;
3331
import org.apache.paimon.catalog.CatalogContext;
3432
import org.apache.paimon.catalog.CatalogFactory;
@@ -41,8 +39,9 @@
4139

4240
import java.util.LinkedHashMap;
4341
import java.util.List;
44-
import java.util.Map;
4542

43+
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimon;
44+
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimonSchema;
4645
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimonSchemaChanges;
4746
import static org.apache.fluss.metadata.TableDescriptor.BUCKET_COLUMN_NAME;
4847
import static org.apache.fluss.metadata.TableDescriptor.OFFSET_COLUMN_NAME;
@@ -66,11 +65,6 @@ public class PaimonLakeCatalog implements LakeCatalog {
6665

6766
private final Catalog paimonCatalog;
6867

69-
// for fluss config
70-
private static final String FLUSS_CONF_PREFIX = "fluss.";
71-
// for paimon config
72-
private static final String PAIMON_CONF_PREFIX = "paimon.";
73-
7468
public PaimonLakeCatalog(Configuration configuration) {
7569
this.paimonCatalog =
7670
CatalogFactory.createCatalog(
@@ -86,7 +80,7 @@ protected Catalog getPaimonCatalog() {
8680
public void createTable(TablePath tablePath, TableDescriptor tableDescriptor)
8781
throws TableAlreadyExistException {
8882
// then, create the table
89-
Identifier paimonPath = toPaimonIdentifier(tablePath);
83+
Identifier paimonPath = toPaimon(tablePath);
9084
Schema paimonSchema = toPaimonSchema(tableDescriptor);
9185
try {
9286
createTable(paimonPath, paimonSchema);
@@ -111,9 +105,8 @@ public void createTable(TablePath tablePath, TableDescriptor tableDescriptor)
111105
public void alterTable(TablePath tablePath, List<TableChange> tableChanges)
112106
throws TableNotExistException {
113107
try {
114-
Identifier paimonPath = toPaimonIdentifier(tablePath);
115-
List<SchemaChange> paimonSchemaChanges =
116-
toPaimonSchemaChanges(tableChanges, this::getFlussPropertyKeyToPaimon);
108+
Identifier paimonPath = toPaimon(tablePath);
109+
List<SchemaChange> paimonSchemaChanges = toPaimonSchemaChanges(tableChanges);
117110
alterTable(paimonPath, paimonSchemaChanges);
118111
} catch (Catalog.ColumnAlreadyExistException | Catalog.ColumnNotExistException e) {
119112
// shouldn't happen before we support schema change
@@ -149,97 +142,6 @@ private void alterTable(Identifier tablePath, List<SchemaChange> tableChanges)
149142
}
150143
}
151144

152-
private Identifier toPaimonIdentifier(TablePath tablePath) {
153-
return Identifier.create(tablePath.getDatabaseName(), tablePath.getTableName());
154-
}
155-
156-
private Schema toPaimonSchema(TableDescriptor tableDescriptor) {
157-
Schema.Builder schemaBuilder = Schema.newBuilder();
158-
Options options = new Options();
159-
160-
// set default properties
161-
setPaimonDefaultProperties(options);
162-
163-
// When bucket key is undefined, it should use dynamic bucket (bucket = -1) mode.
164-
List<String> bucketKeys = tableDescriptor.getBucketKeys();
165-
if (!bucketKeys.isEmpty()) {
166-
int numBuckets =
167-
tableDescriptor
168-
.getTableDistribution()
169-
.flatMap(TableDescriptor.TableDistribution::getBucketCount)
170-
.orElseThrow(
171-
() ->
172-
new IllegalArgumentException(
173-
"Bucket count should be set."));
174-
options.set(CoreOptions.BUCKET, numBuckets);
175-
options.set(CoreOptions.BUCKET_KEY, String.join(",", bucketKeys));
176-
} else {
177-
options.set(CoreOptions.BUCKET, CoreOptions.BUCKET.defaultValue());
178-
}
179-
180-
// set schema
181-
for (org.apache.fluss.metadata.Schema.Column column :
182-
tableDescriptor.getSchema().getColumns()) {
183-
String columnName = column.getName();
184-
if (SYSTEM_COLUMNS.containsKey(columnName)) {
185-
throw new InvalidTableException(
186-
"Column "
187-
+ columnName
188-
+ " conflicts with a system column name of paimon table, please rename the column.");
189-
}
190-
schemaBuilder.column(
191-
columnName,
192-
column.getDataType().accept(FlussDataTypeToPaimonDataType.INSTANCE),
193-
column.getComment().orElse(null));
194-
}
195-
196-
// add system metadata columns to schema
197-
for (Map.Entry<String, DataType> systemColumn : SYSTEM_COLUMNS.entrySet()) {
198-
schemaBuilder.column(systemColumn.getKey(), systemColumn.getValue());
199-
}
200-
201-
// set pk
202-
if (tableDescriptor.hasPrimaryKey()) {
203-
schemaBuilder.primaryKey(
204-
tableDescriptor.getSchema().getPrimaryKey().get().getColumnNames());
205-
options.set(
206-
CoreOptions.CHANGELOG_PRODUCER.key(),
207-
CoreOptions.ChangelogProducer.INPUT.toString());
208-
}
209-
// set partition keys
210-
schemaBuilder.partitionKeys(tableDescriptor.getPartitionKeys());
211-
212-
// set properties to paimon schema
213-
tableDescriptor.getProperties().forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
214-
tableDescriptor
215-
.getCustomProperties()
216-
.forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
217-
schemaBuilder.options(options.toMap());
218-
return schemaBuilder.build();
219-
}
220-
221-
private void setPaimonDefaultProperties(Options options) {
222-
// set partition.legacy-name to false, otherwise paimon will use toString for all types,
223-
// which will cause inconsistent partition value for a same binary value
224-
options.set(CoreOptions.PARTITION_GENERATE_LEGCY_NAME, false);
225-
}
226-
227-
private void setFlussPropertyToPaimon(String key, String value, Options options) {
228-
if (key.startsWith(PAIMON_CONF_PREFIX)) {
229-
options.set(key.substring(PAIMON_CONF_PREFIX.length()), value);
230-
} else {
231-
options.set(FLUSS_CONF_PREFIX + key, value);
232-
}
233-
}
234-
235-
private String getFlussPropertyKeyToPaimon(String key) {
236-
if (key.startsWith(PAIMON_CONF_PREFIX)) {
237-
return key.substring(PAIMON_CONF_PREFIX.length());
238-
} else {
239-
return FLUSS_CONF_PREFIX + key;
240-
}
241-
}
242-
243145
@Override
244146
public void close() {
245147
IOUtils.closeQuietly(paimonCatalog, "paimon catalog");

fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/utils/PaimonConversions.java

Lines changed: 138 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,52 @@
1717

1818
package org.apache.fluss.lake.paimon.utils;
1919

20+
import org.apache.fluss.annotation.VisibleForTesting;
21+
import org.apache.fluss.exception.InvalidConfigException;
22+
import org.apache.fluss.exception.InvalidTableException;
23+
import org.apache.fluss.lake.paimon.FlussDataTypeToPaimonDataType;
2024
import org.apache.fluss.lake.paimon.source.FlussRowAsPaimonRow;
2125
import org.apache.fluss.metadata.TableChange;
26+
import org.apache.fluss.metadata.TableDescriptor;
2227
import org.apache.fluss.metadata.TablePath;
2328
import org.apache.fluss.record.ChangeType;
2429
import org.apache.fluss.row.GenericRow;
2530
import org.apache.fluss.row.InternalRow;
2631

32+
import org.apache.paimon.CoreOptions;
2733
import org.apache.paimon.catalog.Identifier;
34+
import org.apache.paimon.options.Options;
35+
import org.apache.paimon.schema.Schema;
2836
import org.apache.paimon.schema.SchemaChange;
2937
import org.apache.paimon.types.DataType;
3038
import org.apache.paimon.types.RowKind;
3139
import org.apache.paimon.types.RowType;
3240

3341
import java.util.ArrayList;
42+
import java.util.HashSet;
3443
import java.util.List;
35-
import java.util.function.Function;
44+
import java.util.Map;
45+
import java.util.Set;
46+
47+
import static org.apache.fluss.lake.paimon.PaimonLakeCatalog.SYSTEM_COLUMNS;
3648

3749
/** Utils for conversion between Paimon and Fluss. */
3850
public class PaimonConversions {
3951

52+
// for fluss config
53+
private static final String FLUSS_CONF_PREFIX = "fluss.";
54+
// for paimon config
55+
private static final String PAIMON_CONF_PREFIX = "paimon.";
56+
57+
/** Paimon config options set by Fluss should not be set by users. */
58+
@VisibleForTesting public static final Set<String> PAIMON_UNSETTABLE_OPTIONS = new HashSet<>();
59+
60+
static {
61+
PAIMON_UNSETTABLE_OPTIONS.add(CoreOptions.BUCKET.key());
62+
PAIMON_UNSETTABLE_OPTIONS.add(CoreOptions.BUCKET_KEY.key());
63+
PAIMON_UNSETTABLE_OPTIONS.add(CoreOptions.PARTITION_GENERATE_LEGCY_NAME.key());
64+
}
65+
4066
public static RowKind toRowKind(ChangeType changeType) {
4167
switch (changeType) {
4268
case APPEND_ONLY:
@@ -80,22 +106,21 @@ public static Object toPaimonLiteral(DataType dataType, Object flussLiteral) {
80106
.getFieldOrNull(flussRowAsPaimonRow);
81107
}
82108

83-
public static List<SchemaChange> toPaimonSchemaChanges(
84-
List<TableChange> tableChanges, Function<String, String> optionKeyTransformer) {
109+
public static List<SchemaChange> toPaimonSchemaChanges(List<TableChange> tableChanges) {
85110
List<SchemaChange> schemaChanges = new ArrayList<>(tableChanges.size());
86111

87112
for (TableChange tableChange : tableChanges) {
88113
if (tableChange instanceof TableChange.SetOption) {
89114
TableChange.SetOption setOption = (TableChange.SetOption) tableChange;
90115
schemaChanges.add(
91116
SchemaChange.setOption(
92-
optionKeyTransformer.apply(setOption.getKey()),
117+
convertFlussPropertyKeyToPaimon(setOption.getKey()),
93118
setOption.getValue()));
94119
} else if (tableChange instanceof TableChange.ResetOption) {
95120
TableChange.ResetOption resetOption = (TableChange.ResetOption) tableChange;
96121
schemaChanges.add(
97122
SchemaChange.removeOption(
98-
optionKeyTransformer.apply(resetOption.getKey())));
123+
convertFlussPropertyKeyToPaimon(resetOption.getKey())));
99124
} else {
100125
throw new UnsupportedOperationException(
101126
"Unsupported table change: " + tableChange.getClass());
@@ -104,4 +129,112 @@ public static List<SchemaChange> toPaimonSchemaChanges(
104129

105130
return schemaChanges;
106131
}
132+
133+
public static Schema toPaimonSchema(TableDescriptor tableDescriptor) {
134+
// validate paimon options first
135+
validatePaimonOptions(tableDescriptor.getProperties());
136+
validatePaimonOptions(tableDescriptor.getCustomProperties());
137+
138+
Schema.Builder schemaBuilder = Schema.newBuilder();
139+
Options options = new Options();
140+
141+
// set default properties
142+
setPaimonDefaultProperties(options);
143+
144+
// When bucket key is undefined, it should use dynamic bucket (bucket = -1) mode.
145+
List<String> bucketKeys = tableDescriptor.getBucketKeys();
146+
if (!bucketKeys.isEmpty()) {
147+
int numBuckets =
148+
tableDescriptor
149+
.getTableDistribution()
150+
.flatMap(TableDescriptor.TableDistribution::getBucketCount)
151+
.orElseThrow(
152+
() ->
153+
new IllegalArgumentException(
154+
"Bucket count should be set."));
155+
options.set(CoreOptions.BUCKET, numBuckets);
156+
options.set(CoreOptions.BUCKET_KEY, String.join(",", bucketKeys));
157+
} else {
158+
options.set(CoreOptions.BUCKET, CoreOptions.BUCKET.defaultValue());
159+
}
160+
161+
// set schema
162+
for (org.apache.fluss.metadata.Schema.Column column :
163+
tableDescriptor.getSchema().getColumns()) {
164+
String columnName = column.getName();
165+
if (SYSTEM_COLUMNS.containsKey(columnName)) {
166+
throw new InvalidTableException(
167+
"Column "
168+
+ columnName
169+
+ " conflicts with a system column name of paimon table, please rename the column.");
170+
}
171+
schemaBuilder.column(
172+
columnName,
173+
column.getDataType().accept(FlussDataTypeToPaimonDataType.INSTANCE),
174+
column.getComment().orElse(null));
175+
}
176+
177+
// add system metadata columns to schema
178+
for (Map.Entry<String, DataType> systemColumn : SYSTEM_COLUMNS.entrySet()) {
179+
schemaBuilder.column(systemColumn.getKey(), systemColumn.getValue());
180+
}
181+
182+
// set pk
183+
if (tableDescriptor.hasPrimaryKey()) {
184+
schemaBuilder.primaryKey(
185+
tableDescriptor.getSchema().getPrimaryKey().get().getColumnNames());
186+
options.set(
187+
CoreOptions.CHANGELOG_PRODUCER.key(),
188+
CoreOptions.ChangelogProducer.INPUT.toString());
189+
}
190+
// set partition keys
191+
schemaBuilder.partitionKeys(tableDescriptor.getPartitionKeys());
192+
193+
// set properties to paimon schema
194+
tableDescriptor.getProperties().forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
195+
tableDescriptor
196+
.getCustomProperties()
197+
.forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
198+
schemaBuilder.options(options.toMap());
199+
return schemaBuilder.build();
200+
}
201+
202+
private static void validatePaimonOptions(Map<String, String> properties) {
203+
properties.forEach(
204+
(k, v) -> {
205+
String paimonKey = k;
206+
if (k.startsWith(PAIMON_CONF_PREFIX)) {
207+
paimonKey = k.substring(PAIMON_CONF_PREFIX.length());
208+
}
209+
if (PAIMON_UNSETTABLE_OPTIONS.contains(paimonKey)) {
210+
throw new InvalidConfigException(
211+
String.format(
212+
"The Paimon option %s will be set automatically by Fluss "
213+
+ "and should not be set manually.",
214+
k));
215+
}
216+
});
217+
}
218+
219+
private static void setPaimonDefaultProperties(Options options) {
220+
// set partition.legacy-name to false, otherwise paimon will use toString for all types,
221+
// which will cause inconsistent partition value for the same binary value
222+
options.set(CoreOptions.PARTITION_GENERATE_LEGCY_NAME, false);
223+
}
224+
225+
private static void setFlussPropertyToPaimon(String key, String value, Options options) {
226+
if (key.startsWith(PAIMON_CONF_PREFIX)) {
227+
options.set(key.substring(PAIMON_CONF_PREFIX.length()), value);
228+
} else {
229+
options.set(FLUSS_CONF_PREFIX + key, value);
230+
}
231+
}
232+
233+
private static String convertFlussPropertyKeyToPaimon(String key) {
234+
if (key.startsWith(PAIMON_CONF_PREFIX)) {
235+
return key.substring(PAIMON_CONF_PREFIX.length());
236+
} else {
237+
return FLUSS_CONF_PREFIX + key;
238+
}
239+
}
107240
}

fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/LakeEnabledTableCreateITCase.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import java.util.Map;
5858
import java.util.stream.Stream;
5959

60+
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.PAIMON_UNSETTABLE_OPTIONS;
6061
import static org.apache.fluss.metadata.TableDescriptor.BUCKET_COLUMN_NAME;
6162
import static org.apache.fluss.metadata.TableDescriptor.OFFSET_COLUMN_NAME;
6263
import static org.apache.fluss.metadata.TableDescriptor.TIMESTAMP_COLUMN_NAME;
@@ -370,6 +371,36 @@ void testCreateLakeEnabledTableWithAllTypes() throws Exception {
370371
BUCKET_NUM);
371372
}
372373

374+
@Test
375+
void testCreateLakeEnableTableWithUnsettablePaimonOptions() {
376+
Map<String, String> customProperties = new HashMap<>();
377+
378+
for (String key : PAIMON_UNSETTABLE_OPTIONS) {
379+
customProperties.clear();
380+
customProperties.put(key, "v");
381+
382+
TableDescriptor table =
383+
TableDescriptor.builder()
384+
.schema(
385+
Schema.newBuilder()
386+
.column("c1", DataTypes.INT())
387+
.column("c2", DataTypes.STRING())
388+
.build())
389+
.property(ConfigOptions.TABLE_DATALAKE_ENABLED, true)
390+
.customProperties(customProperties)
391+
.distributedBy(BUCKET_NUM, "c1", "c2")
392+
.build();
393+
TablePath tablePath = TablePath.of(DATABASE, "table_unsettable_paimon_option");
394+
assertThatThrownBy(() -> admin.createTable(tablePath, table, false).get())
395+
.cause()
396+
.isInstanceOf(InvalidConfigException.class)
397+
.hasMessage(
398+
String.format(
399+
"The Paimon option %s will be set automatically by Fluss and should not be set manually.",
400+
key));
401+
}
402+
}
403+
373404
@Test
374405
void testAlterLakeEnabledLogTable() throws Exception {
375406
Map<String, String> customProperties = new HashMap<>();

0 commit comments

Comments
 (0)