Skip to content

Commit 76fbf37

Browse files
committed
[core][spark] Introduce default value when writing from Spark DDL
1 parent 0be50e2 commit 76fbf37

File tree

18 files changed

+414
-56
lines changed

18 files changed

+414
-56
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
---
2+
title: "Default Value"
3+
weight: 8
4+
type: docs
5+
aliases:
6+
- /spark/default-value.html
7+
---
8+
<!--
9+
Licensed to the Apache Software Foundation (ASF) under one
10+
or more contributor license agreements. See the NOTICE file
11+
distributed with this work for additional information
12+
regarding copyright ownership. The ASF licenses this file
13+
to you under the Apache License, Version 2.0 (the
14+
"License"); you may not use this file except in compliance
15+
with the License. You may obtain a copy of the License at
16+
17+
http://www.apache.org/licenses/LICENSE-2.0
18+
19+
Unless required by applicable law or agreed to in writing,
20+
software distributed under the License is distributed on an
21+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
22+
KIND, either express or implied. See the License for the
23+
specific language governing permissions and limitations
24+
under the License.
25+
-->
26+
27+
# Default Value
28+
29+
Paimon allows specifying default values for columns. When users write to these tables without explicitly providing
30+
values for certain columns, Paimon automatically generates default values for these columns.
31+
32+
## Create Table
33+
34+
You can create a table with columns with default values using the following SQL:
35+
36+
```sql
37+
CREATE TABLE my_table (
38+
a BIGINT,
39+
b STRING DEFAULT 'my_value',
40+
c INT DEFAULT 5
41+
);
42+
```
43+
44+
## Insert Table
45+
46+
For SQL commands that execute table writes, such as the `INSERT`, `UPDATE`, and `MERGE` commands, the `DEFAULT` keyword
47+
or `NULL` value is parsed into the default value specified for the corresponding column.
48+
49+
## Limitation
50+
51+
Currently, only specifying default values when creating tables is supported, and the following usage is not supported:
52+
53+
1. Not support alter table add column with default value, for example: `ALTER TABLE T ADD COLUMN d INT DEFAULT 5;`.
54+
2. Not support alter table alter column set default value, for example: `ALTER TABLE T ALTER COLUMN d SET DEFAULT 5;`.

paimon-api/src/main/java/org/apache/paimon/schema/Schema.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ private static List<DataField> normalizeFields(
175175
field.id(),
176176
field.name(),
177177
field.type().copy(false),
178-
field.description()));
178+
field.description(),
179+
field.defaultValue()));
179180
} else {
180181
newFields.add(field);
181182
}
@@ -302,12 +303,28 @@ public Builder column(String columnName, DataType dataType) {
302303
* @param description description of the column
303304
*/
304305
public Builder column(String columnName, DataType dataType, @Nullable String description) {
306+
return column(columnName, dataType, description, null);
307+
}
308+
309+
/**
310+
* Declares a column that is appended to this schema.
311+
*
312+
* @param columnName column name
313+
* @param dataType data type of the column
314+
* @param description description of the column
315+
* @param defaultValue default value of the column
316+
*/
317+
public Builder column(
318+
String columnName,
319+
DataType dataType,
320+
@Nullable String description,
321+
@Nullable String defaultValue) {
305322
Preconditions.checkNotNull(columnName, "Column name must not be null.");
306323
Preconditions.checkNotNull(dataType, "Data type must not be null.");
307324

308325
int id = highestFieldId.incrementAndGet();
309326
DataType reassignDataType = ReassignFieldId.reassign(dataType, highestFieldId);
310-
columns.add(new DataField(id, columnName, reassignDataType, description));
327+
columns.add(new DataField(id, columnName, reassignDataType, description, defaultValue));
311328
return this;
312329
}
313330

paimon-api/src/main/java/org/apache/paimon/types/DataField.java

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.io.Serializable;
2929
import java.util.Objects;
3030

31+
import static org.apache.paimon.types.DataTypeFamily.CHARACTER_STRING;
3132
import static org.apache.paimon.utils.EncodingUtils.escapeIdentifier;
3233
import static org.apache.paimon.utils.EncodingUtils.escapeSingleQuotes;
3334

@@ -41,27 +42,31 @@ public final class DataField implements Serializable {
4142

4243
private static final long serialVersionUID = 1L;
4344

44-
public static final String FIELD_FORMAT_WITH_DESCRIPTION = "%s %s '%s'";
45-
46-
public static final String FIELD_FORMAT_NO_DESCRIPTION = "%s %s";
47-
4845
private final int id;
49-
5046
private final String name;
51-
5247
private final DataType type;
53-
5448
private final @Nullable String description;
49+
private final @Nullable String defaultValue;
5550

5651
public DataField(int id, String name, DataType dataType) {
57-
this(id, name, dataType, null);
52+
this(id, name, dataType, null, null);
5853
}
5954

60-
public DataField(int id, String name, DataType type, @Nullable String description) {
55+
public DataField(int id, String name, DataType dataType, @Nullable String description) {
56+
this(id, name, dataType, description, null);
57+
}
58+
59+
public DataField(
60+
int id,
61+
String name,
62+
DataType type,
63+
@Nullable String description,
64+
@Nullable String defaultValue) {
6165
this.id = id;
6266
this.name = name;
6367
this.type = type;
6468
this.description = description;
69+
this.defaultValue = defaultValue;
6570
}
6671

6772
public int id() {
@@ -76,49 +81,59 @@ public DataType type() {
7681
return type;
7782
}
7883

79-
public DataField newId(int newid) {
80-
return new DataField(newid, name, type, description);
84+
public DataField newId(int newId) {
85+
return new DataField(newId, name, type, description, defaultValue);
8186
}
8287

8388
public DataField newName(String newName) {
84-
return new DataField(id, newName, type, description);
89+
return new DataField(id, newName, type, description, defaultValue);
8590
}
8691

8792
public DataField newType(DataType newType) {
88-
return new DataField(id, name, newType, description);
93+
return new DataField(id, name, newType, description, defaultValue);
8994
}
9095

9196
public DataField newDescription(String newDescription) {
92-
return new DataField(id, name, type, newDescription);
97+
return new DataField(id, name, type, defaultValue, newDescription);
98+
}
99+
100+
public DataField newDefaultValue(String newDefaultValue) {
101+
return new DataField(id, name, type, newDefaultValue, description);
93102
}
94103

95104
@Nullable
96105
public String description() {
97106
return description;
98107
}
99108

109+
@Nullable
110+
public String defaultValue() {
111+
return defaultValue;
112+
}
113+
100114
public DataField copy() {
101-
return new DataField(id, name, type.copy(), description);
115+
return new DataField(id, name, type.copy(), description, defaultValue);
102116
}
103117

104118
public DataField copy(boolean isNullable) {
105-
return new DataField(id, name, type.copy(isNullable), description);
119+
return new DataField(id, name, type.copy(isNullable), description, defaultValue);
106120
}
107121

108122
public String asSQLString() {
109-
return formatString(type.asSQLString());
110-
}
111-
112-
private String formatString(String typeString) {
113-
if (description == null) {
114-
return String.format(FIELD_FORMAT_NO_DESCRIPTION, escapeIdentifier(name), typeString);
115-
} else {
116-
return String.format(
117-
FIELD_FORMAT_WITH_DESCRIPTION,
118-
escapeIdentifier(name),
119-
typeString,
120-
escapeSingleQuotes(description));
123+
StringBuilder sb = new StringBuilder();
124+
sb.append(escapeIdentifier(name)).append(" ").append(type.asSQLString());
125+
if (description != null) {
126+
sb.append(" COMMENT ").append(escapeSingleQuotes(description));
121127
}
128+
if (defaultValue != null) {
129+
sb.append(" DEFAULT ");
130+
if (type.getTypeRoot().getFamilies().contains(CHARACTER_STRING)) {
131+
sb.append(escapeSingleQuotes(defaultValue));
132+
} else {
133+
sb.append(defaultValue);
134+
}
135+
}
136+
return sb.toString();
122137
}
123138

124139
public void serializeJson(JsonGenerator generator) throws IOException {
@@ -130,6 +145,9 @@ public void serializeJson(JsonGenerator generator) throws IOException {
130145
if (description() != null) {
131146
generator.writeStringField("description", description());
132147
}
148+
if (defaultValue() != null) {
149+
generator.writeStringField("defaultValue", defaultValue());
150+
}
133151
generator.writeEndObject();
134152
}
135153

@@ -145,7 +163,8 @@ public boolean equals(Object o) {
145163
return Objects.equals(id, field.id)
146164
&& Objects.equals(name, field.name)
147165
&& Objects.equals(type, field.type)
148-
&& Objects.equals(description, field.description);
166+
&& Objects.equals(description, field.description)
167+
&& Objects.equals(defaultValue, field.defaultValue);
149168
}
150169

151170
public boolean equalsIgnoreFieldId(DataField other) {
@@ -157,7 +176,8 @@ public boolean equalsIgnoreFieldId(DataField other) {
157176
}
158177
return Objects.equals(name, other.name)
159178
&& type.equalsIgnoreFieldId(other.type)
160-
&& Objects.equals(description, other.description);
179+
&& Objects.equals(description, other.description)
180+
&& Objects.equals(defaultValue, other.defaultValue);
161181
}
162182

163183
public boolean isPrunedFrom(DataField other) {
@@ -170,12 +190,13 @@ public boolean isPrunedFrom(DataField other) {
170190
return Objects.equals(id, other.id)
171191
&& Objects.equals(name, other.name)
172192
&& type.isPrunedFrom(other.type)
173-
&& Objects.equals(description, other.description);
193+
&& Objects.equals(description, other.description)
194+
&& Objects.equals(defaultValue, other.defaultValue);
174195
}
175196

176197
@Override
177198
public int hashCode() {
178-
return Objects.hash(id, name, type, description);
199+
return Objects.hash(id, name, type, description, defaultValue);
179200
}
180201

181202
@Override
@@ -193,7 +214,8 @@ public static boolean dataFieldEqualsIgnoreId(DataField dataField1, DataField da
193214
} else if (dataField1 != null && dataField2 != null) {
194215
return Objects.equals(dataField1.name(), dataField2.name())
195216
&& Objects.equals(dataField1.type(), dataField2.type())
196-
&& Objects.equals(dataField1.description(), dataField2.description());
217+
&& Objects.equals(dataField1.description(), dataField2.description())
218+
&& Objects.equals(dataField1.defaultValue(), dataField2.defaultValue());
197219
} else {
198220
return false;
199221
}

paimon-api/src/main/java/org/apache/paimon/types/DataTypeJsonParser.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,12 @@ private static DataField parseDataField(JsonNode json, AtomicInteger fieldId) {
5858
if (descriptionNode != null) {
5959
description = descriptionNode.asText();
6060
}
61-
return new DataField(id, name, type, description);
61+
JsonNode defaultValueNode = json.get("defaultValue");
62+
String defaultValue = null;
63+
if (defaultValueNode != null) {
64+
defaultValue = defaultValueNode.asText();
65+
}
66+
return new DataField(id, name, type, description, defaultValue);
6267
}
6368

6469
public static DataType parseDataType(JsonNode json) {

paimon-api/src/main/java/org/apache/paimon/types/RowType.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonProperty;
2828
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;
2929

30+
import javax.annotation.Nullable;
31+
3032
import java.io.IOException;
3133
import java.util.ArrayList;
3234
import java.util.Arrays;
@@ -387,7 +389,7 @@ public Builder field(String name, DataType type) {
387389
return this;
388390
}
389391

390-
public Builder field(String name, DataType type, String description) {
392+
public Builder field(String name, DataType type, @Nullable String description) {
391393
fields.add(new DataField(fieldId.incrementAndGet(), name, type, description));
392394
return this;
393395
}

paimon-common/src/main/java/org/apache/paimon/casting/DefaultValueRow.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,20 @@
2020

2121
import org.apache.paimon.data.BinaryString;
2222
import org.apache.paimon.data.Decimal;
23+
import org.apache.paimon.data.GenericRow;
2324
import org.apache.paimon.data.InternalArray;
2425
import org.apache.paimon.data.InternalMap;
2526
import org.apache.paimon.data.InternalRow;
2627
import org.apache.paimon.data.Timestamp;
2728
import org.apache.paimon.data.variant.Variant;
29+
import org.apache.paimon.types.DataField;
2830
import org.apache.paimon.types.RowKind;
31+
import org.apache.paimon.types.RowType;
32+
import org.apache.paimon.types.VarCharType;
33+
34+
import javax.annotation.Nullable;
35+
36+
import java.util.List;
2937

3038
/**
3139
* An implementation of {@link InternalRow} which provides a default value for the underlying {@link
@@ -193,4 +201,42 @@ public Variant getVariant(int pos) {
193201
public static DefaultValueRow from(InternalRow defaultValueRow) {
194202
return new DefaultValueRow(defaultValueRow);
195203
}
204+
205+
@Nullable
206+
public static DefaultValueRow create(RowType rowType) {
207+
List<DataField> fields = rowType.getFields();
208+
GenericRow row = new GenericRow(fields.size());
209+
boolean containsDefaultValue = false;
210+
for (int i = 0; i < fields.size(); i++) {
211+
DataField dataField = fields.get(i);
212+
String defaultValueStr = dataField.defaultValue();
213+
if (defaultValueStr == null) {
214+
continue;
215+
}
216+
217+
containsDefaultValue = true;
218+
@SuppressWarnings("unchecked")
219+
CastExecutor<Object, Object> resolve =
220+
(CastExecutor<Object, Object>)
221+
CastExecutors.resolve(VarCharType.STRING_TYPE, dataField.type());
222+
223+
if (resolve == null) {
224+
throw new RuntimeException(
225+
"Default value do not support the type of " + dataField.type());
226+
}
227+
228+
if (defaultValueStr.startsWith("'") && defaultValueStr.endsWith("'")) {
229+
defaultValueStr = defaultValueStr.substring(1, defaultValueStr.length() - 1);
230+
}
231+
232+
Object defaultValue = resolve.cast(BinaryString.fromString(defaultValueStr));
233+
row.setField(i, defaultValue);
234+
}
235+
236+
if (!containsDefaultValue) {
237+
return null;
238+
}
239+
240+
return DefaultValueRow.from(row);
241+
}
196242
}

paimon-core/src/main/java/org/apache/paimon/operation/DefaultValueAssigner.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,6 @@ public DefaultValueAssigner handleReadRowType(RowType readRowType) {
6969
return this;
7070
}
7171

72-
public boolean needToAssign() {
73-
return needToAssign;
74-
}
75-
7672
/** assign default value for column which value is null. */
7773
public RecordReader<InternalRow> assignFieldsDefaultValue(RecordReader<InternalRow> reader) {
7874
if (!needToAssign) {

0 commit comments

Comments
 (0)