Improve lake-first schema evolution safety

buvb · buvb · commit 58ee7dcfa0fb · 2025-12-22T18:21:04.000+08:00
diff --git a/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/tiering/FlussRecordAsPaimonRow.java b/fluss-lake/fluss-lake-paimon/src/main/java/org/apache/fluss/lake/paimon/tiering/FlussRecordAsPaimonRow.java
@@ -51,7 +51,18 @@ public FlussRecordAsPaimonRow(int bucket, RowType tableTowType) {
     public void setFlussRecord(LogRecord logRecord) {
         this.logRecord = logRecord;
         this.internalRow = logRecord.getRow();
-        this.originRowFieldCount = Math.min(internalRow.getFieldCount(), businessFieldCount);
+        int flussFieldCount = internalRow.getFieldCount();
+        if (flussFieldCount > businessFieldCount) {
+            // Fluss record is wider than Paimon schema, which means Lake schema is not yet
+            // synchronized. With "Lake First" strategy, this should not happen in normal cases.
+            throw new IllegalStateException(
+                    String.format(
+                            "Fluss record has %d fields but Paimon schema only has %d business fields. "
+                                    + "This indicates the lake schema is not yet synchronized. "
+                                    + "Please retry the schema change operation.",
+                            flussFieldCount, businessFieldCount));
+        }
+        this.originRowFieldCount = flussFieldCount;
     }
 
     @Override
diff --git a/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/tiering/FlussRecordAsPaimonRowTest.java b/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/tiering/FlussRecordAsPaimonRowTest.java
@@ -38,6 +38,7 @@
 import static org.apache.fluss.record.ChangeType.UPDATE_AFTER;
 import static org.apache.fluss.record.ChangeType.UPDATE_BEFORE;
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 /** Test for {@link FlussRecordAsPaimonRow}. */
 class FlussRecordAsPaimonRowTest {
@@ -197,6 +198,7 @@ void testPaimonSchemaWiderThanFlussRecord() {
 
     @Test
     void testFlussRecordWiderThanPaimonSchema() {
+        // With "Lake First" strategy, Fluss record wider than Paimon schema should throw exception
         int tableBucket = 0;
         RowType tableRowType =
                 RowType.of(
@@ -215,12 +217,11 @@ void testFlussRecordWiderThanPaimonSchema() {
         genericRow.setField(0, true);
         genericRow.setField(1, BinaryString.fromString("extra"));
         LogRecord logRecord = new GenericRecord(logOffset, timeStamp, APPEND_ONLY, genericRow);
-        flussRecordAsPaimonRow.setFlussRecord(logRecord);
 
-        assertThat(flussRecordAsPaimonRow.getFieldCount()).isEqualTo(4);
-        assertThat(flussRecordAsPaimonRow.getBoolean(0)).isTrue();
-        assertThat(flussRecordAsPaimonRow.getInt(1)).isEqualTo(tableBucket);
-        assertThat(flussRecordAsPaimonRow.getLong(2)).isEqualTo(logOffset);
-        assertThat(flussRecordAsPaimonRow.getLong(3)).isEqualTo(timeStamp);
+        // Should throw exception instead of silently truncating data
+        assertThatThrownBy(() -> flussRecordAsPaimonRow.setFlussRecord(logRecord))
+                .isInstanceOf(IllegalStateException.class)
+                .hasMessageContaining(
+                        "Fluss record has 2 fields but Paimon schema only has 1 business fields");
     }
 }
diff --git a/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/tiering/PaimonTieringITCase.java b/fluss-lake/fluss-lake-paimon/src/test/java/org/apache/fluss/lake/paimon/tiering/PaimonTieringITCase.java
@@ -604,6 +604,63 @@ private CloseableIterator<org.apache.paimon.data.InternalRow> getPaimonRowClosea
         return reader.toCloseableIterator();
     }
 
+    @Test
+    void testTieringWithAddColumn() throws Exception {
+        // Test ADD COLUMN during tiering with "Lake First" strategy
+
+        // 1. Create a datalake enabled table with initial schema (c1: INT, c2: STRING)
+        TablePath tablePath = TablePath.of(DEFAULT_DB, "addColumnTable");
+        long tableId = createLogTable(tablePath);
+        TableBucket tableBucket = new TableBucket(tableId, 0);
+
+        // 2. Write initial data before ADD COLUMN
+        List<InternalRow> initialRows = Arrays.asList(row(1, "v1"), row(2, "v2"), row(3, "v3"));
+        writeRows(tablePath, initialRows, true);
+
+        // 3. Start tiering job
+        JobClient jobClient = buildTieringJob(execEnv);
+
+        try {
+            // 4. Wait for initial data to be tiered
+            assertReplicaStatus(tableBucket, 3);
+
+            // 5. Execute ADD COLUMN (c3: INT, nullable)
+            List<TableChange> addColumnChanges =
+                    Collections.singletonList(
+                            TableChange.addColumn(
+                                    "c3",
+                                    DataTypes.INT(),
+                                    "new column",
+                                    TableChange.ColumnPosition.last()));
+            admin.alterTable(tablePath, addColumnChanges, false).get();
+
+            // 6. Write more data after ADD COLUMN (with new column value)
+            List<InternalRow> newRows = Arrays.asList(row(4, "v4"), row(5, "v5"), row(6, "v6"));
+            writeRows(tablePath, newRows, true);
+
+            // 7. Wait for new data to be tiered
+            assertReplicaStatus(tableBucket, 6);
+
+            // 8. Verify Paimon table has the new column
+            Identifier tableIdentifier =
+                    Identifier.create(tablePath.getDatabaseName(), tablePath.getTableName());
+            FileStoreTable paimonTable = (FileStoreTable) paimonCatalog.getTable(tableIdentifier);
+            List<String> fieldNames = paimonTable.rowType().getFieldNames();
+
+            // Should have: c1, c2, c3, __bucket, __offset, __timestamp
+            assertThat(fieldNames).contains("c1", "c2", "c3");
+
+            // 9. Verify all data is present in Paimon (no data loss)
+            List<InternalRow> allRows = new ArrayList<>();
+            allRows.addAll(initialRows);
+            allRows.addAll(newRows);
+            checkDataInPaimonAppendOnlyTable(tablePath, allRows, 0);
+
+        } finally {
+            jobClient.cancel().get();
+        }
+    }
+
     @Override
     protected FlussClusterExtension getFlussClusterExtension() {
         return FLUSS_CLUSTER_EXTENSION;
diff --git a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/MetadataManager.java b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/MetadataManager.java
@@ -335,13 +335,15 @@ public void alterTableSchema(
             // validate the table column changes
             if (!schemaChanges.isEmpty()) {
                 Schema newSchema = SchemaUpdate.applySchemaChanges(table, schemaChanges);
-                // update the schema in Fluss (ZK) first - Fluss is the source of truth
-                if (!newSchema.equals(table.getSchema())) {
-                    zookeeperClient.registerSchema(tablePath, newSchema, table.getSchemaId() + 1);
-                }
 
+                // Lake First: sync to Lake before updating Fluss schema
                 syncSchemaChangesToLake(
                         tablePath, table, schemaChanges, lakeCatalog, lakeCatalogContext);
+
+                // Update Fluss schema (ZK) after Lake sync succeeds
+                if (!newSchema.equals(table.getSchema())) {
+                    zookeeperClient.registerSchema(tablePath, newSchema, table.getSchemaId() + 1);
+                }
             }
         } catch (Exception e) {
             if (e instanceof TableNotExistException) {
diff --git a/fluss-server/src/main/java/org/apache/fluss/server/coordinator/SchemaUpdate.java b/fluss-server/src/main/java/org/apache/fluss/server/coordinator/SchemaUpdate.java
@@ -86,6 +86,7 @@ public SchemaUpdate applySchemaChange(TableChange columnChange) {
     private SchemaUpdate addColumn(TableChange.AddColumn addColumn) {
         Schema.Column existingColumn = existedColumns.get(addColumn.getName());
         if (existingColumn != null) {
+            // Allow idempotent retries: if column name/type/comment match existing, treat as no-op
             if (!existingColumn.getDataType().equals(addColumn.getDataType())
                     || !Objects.equals(existingColumn.getComment(), addColumn.getComment())) {
                 throw new IllegalArgumentException(