Skip to content

Commit 083ab22

Browse files
committed
[lake/paimon] Prohibit users from setting Paimon properties that Fluss depends on
1 parent ec14e1b commit 083ab22

File tree

3 files changed

+185
-108
lines changed

3 files changed

+185
-108
lines changed

fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/PaimonLakeCatalog.java

Lines changed: 5 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import org.apache.fluss.annotation.VisibleForTesting;
2121
import org.apache.fluss.config.Configuration;
22-
import org.apache.fluss.exception.InvalidTableException;
2322
import org.apache.fluss.exception.TableAlreadyExistException;
2423
import org.apache.fluss.exception.TableNotExistException;
2524
import org.apache.fluss.lake.lakestorage.LakeCatalog;
@@ -28,7 +27,6 @@
2827
import org.apache.fluss.metadata.TablePath;
2928
import org.apache.fluss.utils.IOUtils;
3029

31-
import org.apache.paimon.CoreOptions;
3230
import org.apache.paimon.catalog.Catalog;
3331
import org.apache.paimon.catalog.CatalogContext;
3432
import org.apache.paimon.catalog.CatalogFactory;
@@ -41,8 +39,9 @@
4139

4240
import java.util.LinkedHashMap;
4341
import java.util.List;
44-
import java.util.Map;
4542

43+
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimon;
44+
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimonSchema;
4645
import static org.apache.fluss.lake.paimon.utils.PaimonConversions.toPaimonSchemaChanges;
4746
import static org.apache.fluss.metadata.TableDescriptor.BUCKET_COLUMN_NAME;
4847
import static org.apache.fluss.metadata.TableDescriptor.OFFSET_COLUMN_NAME;
@@ -66,11 +65,6 @@ public class PaimonLakeCatalog implements LakeCatalog {
6665

6766
private final Catalog paimonCatalog;
6867

69-
// for fluss config
70-
private static final String FLUSS_CONF_PREFIX = "fluss.";
71-
// for paimon config
72-
private static final String PAIMON_CONF_PREFIX = "paimon.";
73-
7468
public PaimonLakeCatalog(Configuration configuration) {
7569
this.paimonCatalog =
7670
CatalogFactory.createCatalog(
@@ -86,7 +80,7 @@ protected Catalog getPaimonCatalog() {
8680
public void createTable(TablePath tablePath, TableDescriptor tableDescriptor)
8781
throws TableAlreadyExistException {
8882
// then, create the table
89-
Identifier paimonPath = toPaimonIdentifier(tablePath);
83+
Identifier paimonPath = toPaimon(tablePath);
9084
Schema paimonSchema = toPaimonSchema(tableDescriptor);
9185
try {
9286
createTable(paimonPath, paimonSchema);
@@ -111,9 +105,8 @@ public void createTable(TablePath tablePath, TableDescriptor tableDescriptor)
111105
public void alterTable(TablePath tablePath, List<TableChange> tableChanges)
112106
throws TableNotExistException {
113107
try {
114-
Identifier paimonPath = toPaimonIdentifier(tablePath);
115-
List<SchemaChange> paimonSchemaChanges =
116-
toPaimonSchemaChanges(tableChanges, this::getFlussPropertyKeyToPaimon);
108+
Identifier paimonPath = toPaimon(tablePath);
109+
List<SchemaChange> paimonSchemaChanges = toPaimonSchemaChanges(tableChanges);
117110
alterTable(paimonPath, paimonSchemaChanges);
118111
} catch (Catalog.ColumnAlreadyExistException | Catalog.ColumnNotExistException e) {
119112
// shouldn't happen before we support schema change
@@ -149,97 +142,6 @@ private void alterTable(Identifier tablePath, List<SchemaChange> tableChanges)
149142
}
150143
}
151144

152-
private Identifier toPaimonIdentifier(TablePath tablePath) {
153-
return Identifier.create(tablePath.getDatabaseName(), tablePath.getTableName());
154-
}
155-
156-
private Schema toPaimonSchema(TableDescriptor tableDescriptor) {
157-
Schema.Builder schemaBuilder = Schema.newBuilder();
158-
Options options = new Options();
159-
160-
// set default properties
161-
setPaimonDefaultProperties(options);
162-
163-
// When bucket key is undefined, it should use dynamic bucket (bucket = -1) mode.
164-
List<String> bucketKeys = tableDescriptor.getBucketKeys();
165-
if (!bucketKeys.isEmpty()) {
166-
int numBuckets =
167-
tableDescriptor
168-
.getTableDistribution()
169-
.flatMap(TableDescriptor.TableDistribution::getBucketCount)
170-
.orElseThrow(
171-
() ->
172-
new IllegalArgumentException(
173-
"Bucket count should be set."));
174-
options.set(CoreOptions.BUCKET, numBuckets);
175-
options.set(CoreOptions.BUCKET_KEY, String.join(",", bucketKeys));
176-
} else {
177-
options.set(CoreOptions.BUCKET, CoreOptions.BUCKET.defaultValue());
178-
}
179-
180-
// set schema
181-
for (org.apache.fluss.metadata.Schema.Column column :
182-
tableDescriptor.getSchema().getColumns()) {
183-
String columnName = column.getName();
184-
if (SYSTEM_COLUMNS.containsKey(columnName)) {
185-
throw new InvalidTableException(
186-
"Column "
187-
+ columnName
188-
+ " conflicts with a system column name of paimon table, please rename the column.");
189-
}
190-
schemaBuilder.column(
191-
columnName,
192-
column.getDataType().accept(FlussDataTypeToPaimonDataType.INSTANCE),
193-
column.getComment().orElse(null));
194-
}
195-
196-
// add system metadata columns to schema
197-
for (Map.Entry<String, DataType> systemColumn : SYSTEM_COLUMNS.entrySet()) {
198-
schemaBuilder.column(systemColumn.getKey(), systemColumn.getValue());
199-
}
200-
201-
// set pk
202-
if (tableDescriptor.hasPrimaryKey()) {
203-
schemaBuilder.primaryKey(
204-
tableDescriptor.getSchema().getPrimaryKey().get().getColumnNames());
205-
options.set(
206-
CoreOptions.CHANGELOG_PRODUCER.key(),
207-
CoreOptions.ChangelogProducer.INPUT.toString());
208-
}
209-
// set partition keys
210-
schemaBuilder.partitionKeys(tableDescriptor.getPartitionKeys());
211-
212-
// set properties to paimon schema
213-
tableDescriptor.getProperties().forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
214-
tableDescriptor
215-
.getCustomProperties()
216-
.forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
217-
schemaBuilder.options(options.toMap());
218-
return schemaBuilder.build();
219-
}
220-
221-
private void setPaimonDefaultProperties(Options options) {
222-
// set partition.legacy-name to false, otherwise paimon will use toString for all types,
223-
// which will cause inconsistent partition value for a same binary value
224-
options.set(CoreOptions.PARTITION_GENERATE_LEGCY_NAME, false);
225-
}
226-
227-
private void setFlussPropertyToPaimon(String key, String value, Options options) {
228-
if (key.startsWith(PAIMON_CONF_PREFIX)) {
229-
options.set(key.substring(PAIMON_CONF_PREFIX.length()), value);
230-
} else {
231-
options.set(FLUSS_CONF_PREFIX + key, value);
232-
}
233-
}
234-
235-
private String getFlussPropertyKeyToPaimon(String key) {
236-
if (key.startsWith(PAIMON_CONF_PREFIX)) {
237-
return key.substring(PAIMON_CONF_PREFIX.length());
238-
} else {
239-
return FLUSS_CONF_PREFIX + key;
240-
}
241-
}
242-
243145
@Override
244146
public void close() {
245147
IOUtils.closeQuietly(paimonCatalog, "paimon catalog");

fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/utils/PaimonConversions.java

Lines changed: 143 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,58 @@
1717

1818
package org.apache.fluss.lake.paimon.utils;
1919

20+
import org.apache.fluss.annotation.VisibleForTesting;
21+
import org.apache.fluss.exception.InvalidConfigException;
22+
import org.apache.fluss.exception.InvalidTableException;
23+
import org.apache.fluss.lake.paimon.FlussDataTypeToPaimonDataType;
2024
import org.apache.fluss.lake.paimon.source.FlussRowAsPaimonRow;
2125
import org.apache.fluss.metadata.TableChange;
26+
import org.apache.fluss.metadata.TableDescriptor;
2227
import org.apache.fluss.metadata.TablePath;
2328
import org.apache.fluss.record.ChangeType;
2429
import org.apache.fluss.row.GenericRow;
2530
import org.apache.fluss.row.InternalRow;
2631

32+
import org.apache.paimon.CoreOptions;
2733
import org.apache.paimon.catalog.Identifier;
34+
import org.apache.paimon.options.Options;
35+
import org.apache.paimon.schema.Schema;
2836
import org.apache.paimon.schema.SchemaChange;
2937
import org.apache.paimon.types.DataType;
3038
import org.apache.paimon.types.RowKind;
3139
import org.apache.paimon.types.RowType;
3240

3341
import java.util.ArrayList;
42+
import java.util.HashSet;
3443
import java.util.List;
35-
import java.util.function.Function;
44+
import java.util.Map;
45+
import java.util.Set;
46+
47+
import static org.apache.fluss.lake.paimon.PaimonLakeCatalog.SYSTEM_COLUMNS;
3648

3749
/** Utils for conversion between Paimon and Fluss. */
3850
public class PaimonConversions {
3951

52+
// for fluss config
53+
private static final String FLUSS_CONF_PREFIX = "fluss.";
54+
// for paimon config
55+
private static final String PAIMON_CONF_PREFIX = "paimon.";
56+
57+
/** Paimon config options set by Fluss should not be set by users. */
58+
@VisibleForTesting public static final Set<String> PAIMON_UNSETTABLE_OPTIONS = new HashSet<>();
59+
60+
@VisibleForTesting public static final Options PAIMON_DEFAULT_OPTIONS = new Options();
61+
62+
static {
63+
PAIMON_UNSETTABLE_OPTIONS.add(CoreOptions.BUCKET.key());
64+
PAIMON_UNSETTABLE_OPTIONS.add(CoreOptions.BUCKET_KEY.key());
65+
PAIMON_UNSETTABLE_OPTIONS.add(CoreOptions.CHANGELOG_PRODUCER.key());
66+
67+
// set partition.legacy-name to false, otherwise paimon will use toString for all types,
68+
// which will cause inconsistent partition value for a same binary value
69+
PAIMON_DEFAULT_OPTIONS.set(CoreOptions.PARTITION_GENERATE_LEGCY_NAME, false);
70+
}
71+
4072
public static RowKind toRowKind(ChangeType changeType) {
4173
switch (changeType) {
4274
case APPEND_ONLY:
@@ -80,22 +112,21 @@ public static Object toPaimonLiteral(DataType dataType, Object flussLiteral) {
80112
.getFieldOrNull(flussRowAsPaimonRow);
81113
}
82114

83-
public static List<SchemaChange> toPaimonSchemaChanges(
84-
List<TableChange> tableChanges, Function<String, String> optionKeyTransformer) {
115+
public static List<SchemaChange> toPaimonSchemaChanges(List<TableChange> tableChanges) {
85116
List<SchemaChange> schemaChanges = new ArrayList<>(tableChanges.size());
86117

87118
for (TableChange tableChange : tableChanges) {
88119
if (tableChange instanceof TableChange.SetOption) {
89120
TableChange.SetOption setOption = (TableChange.SetOption) tableChange;
90121
schemaChanges.add(
91122
SchemaChange.setOption(
92-
optionKeyTransformer.apply(setOption.getKey()),
123+
getFlussPropertyKeyToPaimon(setOption.getKey()),
93124
setOption.getValue()));
94125
} else if (tableChange instanceof TableChange.ResetOption) {
95126
TableChange.ResetOption resetOption = (TableChange.ResetOption) tableChange;
96127
schemaChanges.add(
97128
SchemaChange.removeOption(
98-
optionKeyTransformer.apply(resetOption.getKey())));
129+
getFlussPropertyKeyToPaimon(resetOption.getKey())));
99130
} else {
100131
throw new UnsupportedOperationException(
101132
"Unsupported table change: " + tableChange.getClass());
@@ -104,4 +135,111 @@ public static List<SchemaChange> toPaimonSchemaChanges(
104135

105136
return schemaChanges;
106137
}
138+
139+
public static Schema toPaimonSchema(TableDescriptor tableDescriptor) {
140+
// validate paimon options first
141+
validatePaimonOptions(tableDescriptor.getProperties());
142+
validatePaimonOptions(tableDescriptor.getCustomProperties());
143+
144+
Schema.Builder schemaBuilder = Schema.newBuilder();
145+
Options options = new Options();
146+
147+
// set default properties
148+
setPaimonDefaultProperties(options);
149+
150+
// When bucket key is undefined, it should use dynamic bucket (bucket = -1) mode.
151+
List<String> bucketKeys = tableDescriptor.getBucketKeys();
152+
if (!bucketKeys.isEmpty()) {
153+
int numBuckets =
154+
tableDescriptor
155+
.getTableDistribution()
156+
.flatMap(TableDescriptor.TableDistribution::getBucketCount)
157+
.orElseThrow(
158+
() ->
159+
new IllegalArgumentException(
160+
"Bucket count should be set."));
161+
options.set(CoreOptions.BUCKET, numBuckets);
162+
options.set(CoreOptions.BUCKET_KEY, String.join(",", bucketKeys));
163+
} else {
164+
options.set(CoreOptions.BUCKET, CoreOptions.BUCKET.defaultValue());
165+
}
166+
167+
// set schema
168+
for (org.apache.fluss.metadata.Schema.Column column :
169+
tableDescriptor.getSchema().getColumns()) {
170+
String columnName = column.getName();
171+
if (SYSTEM_COLUMNS.containsKey(columnName)) {
172+
throw new InvalidTableException(
173+
"Column "
174+
+ columnName
175+
+ " conflicts with a system column name of paimon table, please rename the column.");
176+
}
177+
schemaBuilder.column(
178+
columnName,
179+
column.getDataType().accept(FlussDataTypeToPaimonDataType.INSTANCE),
180+
column.getComment().orElse(null));
181+
}
182+
183+
// add system metadata columns to schema
184+
for (Map.Entry<String, DataType> systemColumn : SYSTEM_COLUMNS.entrySet()) {
185+
schemaBuilder.column(systemColumn.getKey(), systemColumn.getValue());
186+
}
187+
188+
// set pk
189+
if (tableDescriptor.hasPrimaryKey()) {
190+
schemaBuilder.primaryKey(
191+
tableDescriptor.getSchema().getPrimaryKey().get().getColumnNames());
192+
options.set(
193+
CoreOptions.CHANGELOG_PRODUCER.key(),
194+
CoreOptions.ChangelogProducer.INPUT.toString());
195+
}
196+
// set partition keys
197+
schemaBuilder.partitionKeys(tableDescriptor.getPartitionKeys());
198+
199+
// set properties to paimon schema
200+
tableDescriptor.getProperties().forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
201+
tableDescriptor
202+
.getCustomProperties()
203+
.forEach((k, v) -> setFlussPropertyToPaimon(k, v, options));
204+
schemaBuilder.options(options.toMap());
205+
return schemaBuilder.build();
206+
}
207+
208+
private static void validatePaimonOptions(Map<String, String> properties) {
209+
properties.forEach(
210+
(k, v) -> {
211+
String paimonKey = k;
212+
if (k.startsWith(PAIMON_CONF_PREFIX)) {
213+
paimonKey = k.substring(PAIMON_CONF_PREFIX.length());
214+
}
215+
if (PAIMON_UNSETTABLE_OPTIONS.contains(paimonKey)
216+
|| PAIMON_DEFAULT_OPTIONS.toMap().containsKey(paimonKey)) {
217+
throw new InvalidConfigException(
218+
String.format(
219+
"The Paimon option %s will be set automatically by Fluss "
220+
+ "and should not set manually.",
221+
k));
222+
}
223+
});
224+
}
225+
226+
private static void setPaimonDefaultProperties(Options options) {
227+
PAIMON_DEFAULT_OPTIONS.toMap().forEach(options::set);
228+
}
229+
230+
private static void setFlussPropertyToPaimon(String key, String value, Options options) {
231+
if (key.startsWith(PAIMON_CONF_PREFIX)) {
232+
options.set(key.substring(PAIMON_CONF_PREFIX.length()), value);
233+
} else {
234+
options.set(FLUSS_CONF_PREFIX + key, value);
235+
}
236+
}
237+
238+
private static String getFlussPropertyKeyToPaimon(String key) {
239+
if (key.startsWith(PAIMON_CONF_PREFIX)) {
240+
return key.substring(PAIMON_CONF_PREFIX.length());
241+
} else {
242+
return FLUSS_CONF_PREFIX + key;
243+
}
244+
}
107245
}

0 commit comments

Comments
 (0)