|
26 | 26 | import org.apache.paimon.format.FileFormat; |
27 | 27 | import org.apache.paimon.mergetree.compact.aggregate.FieldAggregator; |
28 | 28 | import org.apache.paimon.mergetree.compact.aggregate.factory.FieldAggregatorFactory; |
29 | | -import org.apache.paimon.mergetree.compact.aggregate.factory.FieldLastNonNullValueAggFactory; |
30 | 29 | import org.apache.paimon.options.ConfigOption; |
31 | 30 | import org.apache.paimon.options.Options; |
32 | 31 | import org.apache.paimon.table.BucketMode; |
|
44 | 43 | import org.apache.paimon.utils.Preconditions; |
45 | 44 | import org.apache.paimon.utils.StringUtils; |
46 | 45 |
|
47 | | -import java.util.ArrayList; |
48 | 46 | import java.util.Arrays; |
49 | | -import java.util.Collection; |
50 | 47 | import java.util.Collections; |
51 | 48 | import java.util.HashMap; |
52 | | -import java.util.HashSet; |
53 | 49 | import java.util.List; |
54 | 50 | import java.util.Map; |
55 | 51 | import java.util.Optional; |
56 | | -import java.util.Set; |
57 | 52 | import java.util.stream.Collectors; |
58 | 53 |
|
59 | 54 | import static org.apache.paimon.CoreOptions.BUCKET_KEY; |
|
77 | 72 | import static org.apache.paimon.CoreOptions.SNAPSHOT_NUM_RETAINED_MAX; |
78 | 73 | import static org.apache.paimon.CoreOptions.SNAPSHOT_NUM_RETAINED_MIN; |
79 | 74 | import static org.apache.paimon.CoreOptions.STREAMING_READ_OVERWRITE; |
80 | | -import static org.apache.paimon.mergetree.compact.PartialUpdateMergeFunction.SEQUENCE_GROUP; |
| 75 | +import static org.apache.paimon.table.PrimaryKeyTableUtils.createMergeFunctionFactory; |
81 | 76 | import static org.apache.paimon.table.SpecialFields.KEY_FIELD_PREFIX; |
82 | 77 | import static org.apache.paimon.table.SpecialFields.SYSTEM_FIELD_NAMES; |
83 | 78 | import static org.apache.paimon.types.DataTypeRoot.ARRAY; |
@@ -122,7 +117,7 @@ public static void validateTableSchema(TableSchema schema) { |
122 | 117 |
|
123 | 118 | validateSequenceField(schema, options); |
124 | 119 |
|
125 | | - validateSequenceGroup(schema, options); |
| 120 | + validateMergeFunction(schema); |
126 | 121 |
|
127 | 122 | ChangelogProducer changelogProducer = options.changelogProducer(); |
128 | 123 | if (schema.primaryKeys().isEmpty() && changelogProducer != ChangelogProducer.NONE) { |
@@ -449,90 +444,12 @@ private static void validateFieldsPrefix(TableSchema schema, CoreOptions options |
449 | 444 | }); |
450 | 445 | } |
451 | 446 |
|
452 | | - private static void validateSequenceGroup(TableSchema schema, CoreOptions options) { |
453 | | - Map<String, Set<String>> fields2Group = new HashMap<>(); |
454 | | - Set<Integer> sequenceGroupFieldIndexs = new HashSet<>(); |
455 | | - List<String> fieldNames = schema.fieldNames(); |
456 | | - for (Map.Entry<String, String> entry : options.toMap().entrySet()) { |
457 | | - String k = entry.getKey(); |
458 | | - String v = entry.getValue(); |
459 | | - if (k.startsWith(FIELDS_PREFIX) && k.endsWith(SEQUENCE_GROUP)) { |
460 | | - Arrays.stream(v.split(FIELDS_SEPARATOR)) |
461 | | - .map(fieldName -> requireField(fieldName, fieldNames)) |
462 | | - .forEach(sequenceGroupFieldIndexs::add); |
463 | | - String[] sequenceFieldNames = |
464 | | - k.substring( |
465 | | - FIELDS_PREFIX.length() + 1, |
466 | | - k.length() - SEQUENCE_GROUP.length() - 1) |
467 | | - .split(FIELDS_SEPARATOR); |
468 | | - |
469 | | - for (String field : v.split(FIELDS_SEPARATOR)) { |
470 | | - if (!fieldNames.contains(field)) { |
471 | | - throw new IllegalArgumentException( |
472 | | - String.format("Field %s can not be found in table schema.", field)); |
473 | | - } |
474 | | - |
475 | | - List<String> sequenceFieldsList = new ArrayList<>(); |
476 | | - for (String sequenceFieldName : sequenceFieldNames) { |
477 | | - if (!fieldNames.contains(sequenceFieldName)) { |
478 | | - throw new IllegalArgumentException( |
479 | | - String.format( |
480 | | - "The sequence field group: %s can not be found in table schema.", |
481 | | - sequenceFieldName)); |
482 | | - } |
483 | | - sequenceFieldsList.add(sequenceFieldName); |
484 | | - } |
485 | | - |
486 | | - if (fields2Group.containsKey(field)) { |
487 | | - List<List<String>> sequenceGroups = new ArrayList<>(); |
488 | | - sequenceGroups.add(new ArrayList<>(fields2Group.get(field))); |
489 | | - sequenceGroups.add(sequenceFieldsList); |
490 | | - |
491 | | - throw new IllegalArgumentException( |
492 | | - String.format( |
493 | | - "Field %s is defined repeatedly by multiple groups: %s.", |
494 | | - field, sequenceGroups)); |
495 | | - } |
496 | | - |
497 | | - Set<String> group = fields2Group.computeIfAbsent(field, p -> new HashSet<>()); |
498 | | - group.addAll(sequenceFieldsList); |
499 | | - } |
500 | | - |
501 | | - // add self |
502 | | - Arrays.stream(sequenceFieldNames) |
503 | | - .mapToInt(fieldName -> requireField(fieldName, fieldNames)) |
504 | | - .forEach(sequenceGroupFieldIndexs::add); |
505 | | - } |
506 | | - } |
507 | | - |
508 | | - if (options.mergeEngine() == MergeEngine.PARTIAL_UPDATE) { |
509 | | - for (String fieldName : fieldNames) { |
510 | | - String aggFunc = options.fieldAggFunc(fieldName); |
511 | | - String aggFuncName = aggFunc == null ? options.fieldsDefaultFunc() : aggFunc; |
512 | | - if (schema.primaryKeys().contains(fieldName)) { |
513 | | - continue; |
514 | | - } |
515 | | - if (aggFuncName != null) { |
516 | | - // last_non_null_value doesn't require sequence group |
517 | | - checkArgument( |
518 | | - aggFuncName.equals(FieldLastNonNullValueAggFactory.NAME) |
519 | | - || sequenceGroupFieldIndexs.contains( |
520 | | - fieldNames.indexOf(fieldName)), |
521 | | - "Must use sequence group for aggregation functions but not found for field %s.", |
522 | | - fieldName); |
523 | | - } |
524 | | - } |
| 447 | + private static void validateMergeFunction(TableSchema schema) { |
| 448 | + if (schema.primaryKeys().isEmpty()) { |
| 449 | + return; |
525 | 450 | } |
526 | 451 |
|
527 | | - Set<String> illegalGroup = |
528 | | - fields2Group.values().stream() |
529 | | - .flatMap(Collection::stream) |
530 | | - .filter(g -> options.fieldAggFunc(g) != null) |
531 | | - .collect(Collectors.toSet()); |
532 | | - if (!illegalGroup.isEmpty()) { |
533 | | - throw new IllegalArgumentException( |
534 | | - "Should not defined aggregation function on sequence group: " + illegalGroup); |
535 | | - } |
| 452 | + createMergeFunctionFactory(schema); |
536 | 453 | } |
537 | 454 |
|
538 | 455 | private static void validateForDeletionVectors(CoreOptions options) { |
|
0 commit comments