Skip to content

Commit 2dd5a37

Browse files
authored
[Kernel] Add ability to store type changes on StructField (#4519)
<!-- Thanks for sending a pull request! Here are some tips for you: 1. If this is your first time, please read our contributor guidelines: https://github.com/delta-io/delta/blob/master/CONTRIBUTING.md 2. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP] Your PR title ...'. 3. Be sure to keep the PR description updated to reflect all changes. 4. Please write your PR title to summarize what this PR proposes. 5. If possible, provide a concise example to reproduce the issue for a faster review. 6. If applicable, include the corresponding issue number in the PR title and link it in the body. --> #### Which Delta project/connector is this regarding? - [ ] Spark - [ ] Standalone - [ ] Flink - [x] Kernel - [ ] Other (fill in here) ## Description This change adds a new `TypeChange` class and the ability store a list of type changes on FieldStruct. This will allow for persisting/updating type changes (and is modelled similar to how collation works due to the complexity of working with maps/arrays). ## How was this patch tested? Add unit tests. ## Does this PR introduce _any_ user-facing changes? This is a API change only.
1 parent f23ca26 commit 2dd5a37

File tree

4 files changed

+295
-9
lines changed

4 files changed

+295
-9
lines changed

kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (2023) The Delta Lake Project Authors.
2+
* Copyright (2025) The Delta Lake Project Authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,8 +17,10 @@
1717
package io.delta.kernel.types;
1818

1919
import io.delta.kernel.annotation.Evolving;
20+
import io.delta.kernel.exceptions.KernelException;
2021
import io.delta.kernel.internal.util.Tuple2;
2122
import java.util.ArrayList;
23+
import java.util.Collections;
2224
import java.util.List;
2325
import java.util.Objects;
2426

@@ -48,7 +50,8 @@ public class StructField {
4850
METADATA_ROW_INDEX_COLUMN_NAME,
4951
LongType.LONG,
5052
false,
51-
FieldMetadata.builder().putBoolean(IS_METADATA_COLUMN_KEY, true).build());
53+
FieldMetadata.builder().putBoolean(IS_METADATA_COLUMN_KEY, true).build(),
54+
Collections.emptyList());
5255

5356
public static final String COLLATIONS_METADATA_KEY = "__COLLATIONS";
5457

@@ -60,19 +63,36 @@ public class StructField {
6063
private final DataType dataType;
6164
private final boolean nullable;
6265
private final FieldMetadata metadata;
66+
private final List<TypeChange> typeChanges;
6367

6468
public StructField(String name, DataType dataType, boolean nullable) {
6569
this(name, dataType, nullable, FieldMetadata.empty());
6670
}
6771

6872
public StructField(String name, DataType dataType, boolean nullable, FieldMetadata metadata) {
73+
this(name, dataType, nullable, metadata, Collections.emptyList());
74+
}
75+
76+
public StructField(
77+
String name,
78+
DataType dataType,
79+
boolean nullable,
80+
FieldMetadata metadata,
81+
List<TypeChange> typeChanges) {
6982
this.name = name;
7083
this.dataType = dataType;
7184
this.nullable = nullable;
85+
this.typeChanges = typeChanges == null ? Collections.emptyList() : typeChanges;
7286

7387
FieldMetadata collationMetadata = fetchCollationMetadata();
7488
this.metadata =
7589
new FieldMetadata.Builder().fromMetadata(metadata).fromMetadata(collationMetadata).build();
90+
if (!this.typeChanges.isEmpty()
91+
&& (dataType instanceof MapType
92+
|| dataType instanceof StructType
93+
|| dataType instanceof ArrayType)) {
94+
throw new KernelException("Type changes are not supported on nested types.");
95+
}
7696
}
7797

7898
/** @return the name of this field */
@@ -95,6 +115,15 @@ public boolean isNullable() {
95115
return nullable;
96116
}
97117

118+
/**
119+
* Returns the list of type changes for this field. A field can go through multiple type changes
120+
* (e.g. {@code int->long->decimal}). Changes are ordered from least recent to most recent in the
121+
* list (index 0 is the oldest change).
122+
*/
123+
public List<TypeChange> getTypeChanges() {
124+
return Collections.unmodifiableList(typeChanges);
125+
}
126+
98127
public boolean isMetadataColumn() {
99128
return metadata.contains(IS_METADATA_COLUMN_KEY)
100129
&& (boolean) metadata.get(IS_METADATA_COLUMN_KEY);
@@ -107,7 +136,8 @@ public boolean isDataColumn() {
107136
@Override
108137
public String toString() {
109138
return String.format(
110-
"StructField(name=%s,type=%s,nullable=%s,metadata=%s)", name, dataType, nullable, metadata);
139+
"StructField(name=%s,type=%s,nullable=%s,metadata=%s,typeChanges=%s)",
140+
name, dataType, nullable, metadata, typeChanges);
111141
}
112142

113143
@Override
@@ -122,16 +152,27 @@ public boolean equals(Object o) {
122152
return nullable == that.nullable
123153
&& name.equals(that.name)
124154
&& dataType.equals(that.dataType)
125-
&& metadata.equals(that.metadata);
155+
&& metadata.equals(that.metadata)
156+
&& Objects.equals(typeChanges, that.typeChanges);
126157
}
127158

128159
@Override
129160
public int hashCode() {
130-
return Objects.hash(name, dataType, nullable, metadata);
161+
return Objects.hash(name, dataType, nullable, metadata, typeChanges);
131162
}
132163

133164
public StructField withNewMetadata(FieldMetadata metadata) {
134-
return new StructField(name, dataType, nullable, metadata);
165+
return new StructField(name, dataType, nullable, metadata, typeChanges);
166+
}
167+
168+
/**
169+
* Creates a copy of this StructField with the specified type changes.
170+
*
171+
* @param typeChanges The list of type changes to set
172+
* @return A new StructField with the same properties but with the specified type changes
173+
*/
174+
public StructField withTypeChanges(List<TypeChange> typeChanges) {
175+
return new StructField(name, dataType, nullable, metadata, typeChanges);
135176
}
136177

137178
private List<Tuple2<String, String>> getNestedCollatedFields(DataType parent, String path) {
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Copyright (2025) The Delta Lake Project Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.delta.kernel.types;
17+
18+
import java.util.Objects;
19+
20+
/**
21+
* Represents a type change for a field, containing the original and new primitive types.
22+
*
23+
* <p>Type changes are actually persisted in metadata attached to StructFields but the rules for
24+
* where the metadata is attached depend on if the change is for nested arrays/maps or primitive
25+
* types.
26+
*/
27+
public class TypeChange {
28+
private final BasePrimitiveType from;
29+
private final BasePrimitiveType to;
30+
31+
public TypeChange(BasePrimitiveType from, BasePrimitiveType to) {
32+
this.from = Objects.requireNonNull(from, "from type cannot be null");
33+
this.to = Objects.requireNonNull(to, "to type cannot be null");
34+
}
35+
36+
public BasePrimitiveType getFrom() {
37+
return from;
38+
}
39+
40+
public BasePrimitiveType getTo() {
41+
return to;
42+
}
43+
44+
@Override
45+
public boolean equals(Object o) {
46+
if (this == o) {
47+
return true;
48+
}
49+
if (o == null || getClass() != o.getClass()) {
50+
return false;
51+
}
52+
TypeChange that = (TypeChange) o;
53+
return Objects.equals(from, that.from) && Objects.equals(to, that.to);
54+
}
55+
56+
@Override
57+
public int hashCode() {
58+
return Objects.hash(from, to);
59+
}
60+
61+
@Override
62+
public String toString() {
63+
return String.format("TypeChange(from=%s,to=%s)", from, to);
64+
}
65+
}

kernel/kernel-api/src/test/scala/io/delta/kernel/internal/metrics/MetricsReportSerializerSuite.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,18 +259,22 @@ class MetricsReportSerializerSuite extends AnyFunSuite {
259259
Optional.of(exception))
260260

261261
// Manually check expected JSON
262+
val tableSchemaStr = "struct(StructField(name=part,type=integer,nullable=true,metadata={}," +
263+
"typeChanges=[]), StructField(name=id,type=integer,nullable=true,metadata={},typeChanges=[]))"
264+
val readSchemaStr = "struct(StructField(name=id,type=integer,nullable=true,metadata={}," +
265+
"typeChanges=[]))"
266+
262267
val expectedJson =
263268
s"""
264269
|{"tablePath":"/table/path",
265270
|"operationType":"Scan",
266271
|"reportUUID":"${scanReport1.getReportUUID}",
267272
|"exception":"$exception",
268273
|"tableVersion":1,
269-
|"tableSchema":"struct(StructField(name=part,type=integer,nullable=true,metadata={}),
270-
| StructField(name=id,type=integer,nullable=true,metadata={}))",
274+
|"tableSchema":"$tableSchemaStr",
271275
|"snapshotReportUUID":"$snapshotReportUUID",
272276
|"filter":"(column(`part`) > 1)",
273-
|"readSchema":"struct(StructField(name=id,type=integer,nullable=true,metadata={}))",
277+
|"readSchema":"$readSchemaStr",
274278
|"partitionPredicate":"(column(`part`) > 1)",
275279
|"dataSkippingFilter":null,
276280
|"isFullyConsumed":true,
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/*
2+
* Copyright (2023) The Delta Lake Project Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package io.delta.kernel.types
18+
19+
import java.util.ArrayList
20+
21+
import io.delta.kernel.exceptions.KernelException
22+
import io.delta.kernel.types.StructField.COLLATIONS_METADATA_KEY
23+
24+
import collection.JavaConverters._
25+
import org.scalatest.funsuite.AnyFunSuite
26+
27+
/**
28+
* Test suite for [[StructField]] class.
29+
*/
30+
class StructFieldSuite extends AnyFunSuite {
31+
32+
// Test equality and hashcode
33+
test("equality and hashcode") {
34+
val field1 = new StructField(
35+
"field",
36+
LongType.LONG,
37+
true,
38+
FieldMetadata.empty(),
39+
Seq(new TypeChange(IntegerType.INTEGER, LongType.LONG)).asJava)
40+
val field2 = new StructField(
41+
"field",
42+
LongType.LONG,
43+
true,
44+
FieldMetadata.empty(),
45+
Seq(new TypeChange(IntegerType.INTEGER, LongType.LONG)).asJava)
46+
val field3 = new StructField("differentField", IntegerType.INTEGER, true)
47+
val field4 = new StructField("field", StringType.STRING, true)
48+
val field5 = new StructField("field", IntegerType.INTEGER, false)
49+
val field6 = new StructField(
50+
"field",
51+
IntegerType.INTEGER,
52+
true,
53+
FieldMetadata.builder().putBoolean("a", true).build(),
54+
Seq(new TypeChange(IntegerType.INTEGER, LongType.LONG)).asJava)
55+
val field7 = new StructField(
56+
"field",
57+
LongType.LONG,
58+
true,
59+
FieldMetadata.empty(),
60+
Seq(new TypeChange(IntegerType.INTEGER, StringType.STRING)).asJava)
61+
62+
assert(field1 == field2)
63+
assert(field1.hashCode() == field2.hashCode())
64+
65+
assert(field1 != field3)
66+
assert(field1 != field4)
67+
assert(field1 != field5)
68+
assert(field1 != field6)
69+
assert(field1 != field7)
70+
}
71+
72+
Seq(
73+
new StructType(),
74+
new ArrayType(LongType.LONG, false),
75+
new MapType(LongType.LONG, LongType.LONG, false)).foreach { dataType =>
76+
test(s"withType should throw exception with change types for nested types $dataType") {
77+
val field = new StructField(
78+
"field",
79+
dataType,
80+
true)
81+
assertThrows[KernelException] {
82+
field.withTypeChanges(Seq(new TypeChange(IntegerType.INTEGER, LongType.LONG)).asJava)
83+
}
84+
}
85+
86+
test(s"Constructor should throw exception with change types for nested types $dataType") {
87+
88+
assertThrows[KernelException] {
89+
new StructField(
90+
"field",
91+
dataType,
92+
true,
93+
FieldMetadata.empty(),
94+
Seq(new TypeChange(IntegerType.INTEGER, LongType.LONG)).asJava)
95+
}
96+
}
97+
}
98+
99+
// Test metadata column detection
100+
test("metadata column detection") {
101+
val regularField = new StructField("regularField", IntegerType.INTEGER, true)
102+
assert(!regularField.isMetadataColumn)
103+
assert(regularField.isDataColumn)
104+
105+
// Create a metadata field
106+
val metadataFieldName = "_metadata.custom"
107+
val metadataBuilder = FieldMetadata.builder()
108+
metadataBuilder.putBoolean("isMetadataColumn", true)
109+
val metadataField =
110+
new StructField(metadataFieldName, LongType.LONG, false, metadataBuilder.build())
111+
112+
assert(metadataField.isMetadataColumn)
113+
assert(!metadataField.isDataColumn)
114+
}
115+
116+
// Test withNewMetadata method
117+
test("withNewMetadata") {
118+
val originalField = new StructField("field", IntegerType.INTEGER, true)
119+
assert(originalField.getMetadata() == FieldMetadata.empty())
120+
121+
val newMetadataBuilder = FieldMetadata.builder()
122+
newMetadataBuilder.putString("key", "value")
123+
val newMetadata = newMetadataBuilder.build()
124+
125+
val updatedField = originalField.withNewMetadata(newMetadata)
126+
127+
assert(updatedField.getName == originalField.getName)
128+
assert(updatedField.getDataType == originalField.getDataType)
129+
assert(updatedField.isNullable == originalField.isNullable)
130+
assert(updatedField.getMetadata == newMetadata)
131+
assert(updatedField.getMetadata.getString("key") == "value")
132+
}
133+
134+
// Test type changes
135+
test("type changes") {
136+
val originalField = new StructField(
137+
"field",
138+
IntegerType.INTEGER,
139+
true,
140+
FieldMetadata.builder().putString("a", "b").build())
141+
assert(originalField.getTypeChanges.isEmpty)
142+
143+
val typeChanges = new ArrayList[TypeChange]()
144+
typeChanges.add(new TypeChange(IntegerType.INTEGER, LongType.LONG))
145+
146+
val updatedField = originalField.withTypeChanges(typeChanges)
147+
148+
assert(updatedField.getName == originalField.getName)
149+
assert(updatedField.getDataType == originalField.getDataType)
150+
assert(updatedField.isNullable == originalField.isNullable)
151+
assert(updatedField.getMetadata == originalField.getMetadata)
152+
assert(updatedField.getTypeChanges.size() == 1)
153+
154+
val typeChange = updatedField.getTypeChanges.get(0)
155+
assert(typeChange.getFrom == IntegerType.INTEGER)
156+
assert(typeChange.getTo == LongType.LONG)
157+
}
158+
159+
// Test TypeChange class
160+
test("TypeChange class") {
161+
val from = IntegerType.INTEGER
162+
val to = LongType.LONG
163+
val typeChange = new TypeChange(from, to)
164+
165+
assert(typeChange.getFrom == from)
166+
assert(typeChange.getTo == to)
167+
168+
// Test equals and hashCode
169+
val sameTypeChange = new TypeChange(IntegerType.INTEGER, LongType.LONG)
170+
val differentTypeChange = new TypeChange(IntegerType.INTEGER, StringType.STRING)
171+
172+
assert(typeChange == sameTypeChange)
173+
assert(typeChange.hashCode() == sameTypeChange.hashCode())
174+
assert(typeChange != differentTypeChange)
175+
}
176+
}

0 commit comments

Comments
 (0)