Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 8e9fd74

Browse files
authored
Adapt doris schema type (#1495)
<!-- A note on testing your PR --> <!-- Basic unit test run is executed against each commit in the PR. If you want to run a full integration test suite, you can trigger it by commenting with '/run-integration-tests' or '/run-it' -->
1 parent e95d49d commit 8e9fd74

File tree

8 files changed

+160
-43
lines changed

8 files changed

+160
-43
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright Quesma, licensed under the Elastic License 2.0.
2+
// SPDX-License-Identifier: Elastic-2.0
3+
package clickhouse
4+
5+
import (
6+
"reflect"
7+
"time"
8+
)
9+
10+
// this is catch all type for all types we do not exlicitly support
11+
type UnknownType struct{}
12+
13+
type ClickhouseTypeResolver struct{}
14+
15+
func (r *ClickhouseTypeResolver) ResolveType(clickHouseTypeName string) reflect.Type {
16+
switch clickHouseTypeName {
17+
case "String", "LowCardinality(String)", "UUID", "FixedString":
18+
return reflect.TypeOf("")
19+
case "DateTime64", "DateTime", "Date", "DateTime64(3)":
20+
return reflect.TypeOf(time.Time{})
21+
case "UInt8", "UInt16", "UInt32", "UInt64":
22+
return reflect.TypeOf(uint64(0))
23+
case "Int8", "Int16", "Int32":
24+
return reflect.TypeOf(int32(0))
25+
case "Int64":
26+
return reflect.TypeOf(int64(0))
27+
case "Float32", "Float64":
28+
return reflect.TypeOf(float64(0))
29+
case "Point":
30+
return reflect.TypeOf(Point{})
31+
case "Bool":
32+
return reflect.TypeOf(true)
33+
case "JSON":
34+
return reflect.TypeOf(map[string]interface{}{})
35+
case "Map(String, Nullable(String))", "Map(String, String)", "Map(LowCardinality(String), String)", "Map(LowCardinality(String), Nullable(String))":
36+
return reflect.TypeOf(map[string]string{})
37+
case "Unknown":
38+
return reflect.TypeOf(UnknownType{})
39+
}
40+
return nil
41+
}

platform/database_common/schema.go

Lines changed: 46 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"github.com/QuesmaOrg/quesma/platform/clickhouse"
88
"github.com/QuesmaOrg/quesma/platform/config"
9+
"github.com/QuesmaOrg/quesma/platform/doris"
910
"github.com/QuesmaOrg/quesma/platform/logger"
1011
"github.com/QuesmaOrg/quesma/platform/schema"
1112
"github.com/QuesmaOrg/quesma/platform/util"
@@ -68,6 +69,13 @@ type (
6869
Origin schema.FieldSource // TODO this field is just added to have way to forward information to the schema registry and should be considered as a technical debt
6970
}
7071
DateTimeType int
72+
InstanceType int
73+
)
74+
75+
const (
76+
DorisInstance InstanceType = iota
77+
ClickHouseInstance
78+
UnknownInstance
7179
)
7280

7381
const (
@@ -155,6 +163,18 @@ func (t MultiValueType) StringWithNullable() string {
155163
return t.String()
156164
}
157165

166+
func GetInstanceType(instanceName string) InstanceType {
167+
switch instanceName {
168+
case "clickhouse":
169+
return DorisInstance
170+
case "doris":
171+
return ClickHouseInstance
172+
default:
173+
logger.Fatal().Msgf("unknown instance name: %s", instanceName)
174+
return UnknownInstance
175+
}
176+
}
177+
158178
func (t MultiValueType) createTableString(indentLvl int) string {
159179
var sb strings.Builder
160180
sb.WriteString(t.Name + "\n" + util.Indent(indentLvl) + "(\n")
@@ -211,44 +231,41 @@ func (t MultiValueType) GetColumn(name string) *Column {
211231
}
212232

213233
func NewBaseType(clickHouseTypeName string) BaseType {
214-
var GoType = ResolveType(clickHouseTypeName)
234+
// TODO: currently, NewBaseType is only used in tests or create table or insert, not in Doris's code, so the ClickHouse schema is used here.
235+
var r TypeResolver = &clickhouse.ClickhouseTypeResolver{}
236+
var GoType = r.ResolveType(clickHouseTypeName)
215237
if GoType == nil {
216238
// default, probably good for dates, etc.
217239
GoType = reflect.TypeOf("")
218240
}
219241
return BaseType{Name: clickHouseTypeName, GoType: GoType}
220242
}
221243

222-
// this is catch all type for all types we do not exlicitly support
223-
type UnknownType struct{}
224-
225-
func ResolveType(clickHouseTypeName string) reflect.Type {
226-
switch clickHouseTypeName {
227-
case "String", "LowCardinality(String)", "UUID", "FixedString":
228-
return reflect.TypeOf("")
229-
case "DateTime64", "DateTime", "Date", "DateTime64(3)":
230-
return reflect.TypeOf(time.Time{})
231-
case "UInt8", "UInt16", "UInt32", "UInt64":
232-
return reflect.TypeOf(uint64(0))
233-
case "Int8", "Int16", "Int32":
234-
return reflect.TypeOf(int32(0))
235-
case "Int64":
236-
return reflect.TypeOf(int64(0))
237-
case "Float32", "Float64":
238-
return reflect.TypeOf(float64(0))
239-
case "Point":
240-
return reflect.TypeOf(clickhouse.Point{})
241-
case "Bool":
242-
return reflect.TypeOf(true)
243-
case "JSON":
244-
return reflect.TypeOf(map[string]interface{}{})
245-
case "Map(String, Nullable(String))", "Map(String, String)", "Map(LowCardinality(String), String)", "Map(LowCardinality(String), Nullable(String))":
246-
return reflect.TypeOf(map[string]string{})
247-
case "Unknown":
248-
return reflect.TypeOf(UnknownType{})
244+
func NewBaseTypeWithInstanceName(typeName string, instanceType InstanceType) BaseType {
245+
r := GetTypeResolver(instanceType)
246+
var GoType = r.ResolveType(typeName)
247+
if GoType == nil {
248+
// default, probably good for dates, etc.
249+
GoType = reflect.TypeOf("")
250+
}
251+
return BaseType{Name: typeName, GoType: GoType}
252+
}
253+
254+
func GetTypeResolver(instanceType InstanceType) TypeResolver {
255+
var r TypeResolver
256+
switch instanceType {
257+
case DorisInstance:
258+
r = &doris.DorisTypeResolver{}
259+
case ClickHouseInstance:
260+
r = &clickhouse.ClickhouseTypeResolver{}
261+
default:
262+
logger.Warn().Msgf("unknown instance type: %v", instanceType)
249263
}
264+
return r
265+
}
250266

251-
return nil
267+
type TypeResolver interface {
268+
ResolveType(typeName string) reflect.Type
252269
}
253270

254271
// 'value': value of a field, from unmarshalled JSON

platform/database_common/schema_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ func TestGetDateTimeType(t *testing.T) {
1717
"timestamp2": {Name: "timestamp2", Type: NewBaseType("DateTime('UTC')")},
1818
"timestamp64_1": {Name: "timestamp64_1", Type: NewBaseType("DateTime64")},
1919
"timestamp64_2": {Name: "timestamp64_2", Type: NewBaseType("DateTime64(3, 'UTC')")},
20+
"datetime1": {Name: "datetime1", Type: NewBaseType("datetime")},
21+
"date1": {Name: "date1", Type: NewBaseType("date")},
2022
},
2123
Config: NewChTableConfigTimestampStringAttr(),
2224
}
@@ -25,5 +27,7 @@ func TestGetDateTimeType(t *testing.T) {
2527
assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "timestamp64_1", true))
2628
assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "timestamp64_2", true))
2729
assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, timestampFieldName, true)) // default, created by us
30+
assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "datetime1", true))
31+
assert.Equal(t, DateTime, table.GetDateTimeType(ctx, "date1", true))
2832
assert.Equal(t, Invalid, table.GetDateTimeType(ctx, "non-existent", false))
2933
}

platform/database_common/table.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ func (t *Table) GetDateTimeType(ctx context.Context, fieldName string, dateInSch
5858
if col, ok := t.Cols[fieldName]; ok {
5959
typeName := col.Type.String()
6060
// hasPrefix, not equal, because we can have DateTime64(3) and we want to catch it
61-
if strings.HasPrefix(typeName, "DateTime64") {
61+
if strings.HasPrefix(typeName, "DateTime64") || strings.HasPrefix(typeName, "datetime") {
6262
return DateTime64
6363
}
64-
if strings.HasPrefix(typeName, "DateTime") {
64+
if strings.HasPrefix(typeName, "DateTime") || strings.HasPrefix(typeName, "date") {
6565
return DateTime
6666
}
6767
}

platform/database_common/table_discovery.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ func (td *tableDiscovery) autoConfigureTables(tables map[string]map[string]colum
380380
}
381381

382382
func (td *tableDiscovery) populateTableDefinitions(configuredTables map[string]discoveredTable, databaseName string, cfg *config.QuesmaConfiguration) {
383+
instanceType := GetInstanceType(td.dbConnPool.InstanceName())
383384

384385
tableMap := NewTableMap()
385386
for tableName, resTable := range configuredTables {
@@ -393,7 +394,7 @@ func (td *tableDiscovery) populateTableDefinitions(configuredTables map[string]d
393394
}
394395
}
395396

396-
column := ResolveColumn(col, columnMeta.colType)
397+
column := ResolveColumn(col, columnMeta.colType, instanceType)
397398
if column != nil {
398399
column.Comment = columnMeta.comment
399400
column.Origin = columnMeta.origin
@@ -481,12 +482,13 @@ func (td *tableDiscovery) TableDefinitions() *TableMap {
481482
return td.tableDefinitions.Load()
482483
}
483484

484-
func ResolveColumn(colName, colType string) *Column {
485+
func ResolveColumn(colName, colType string, instanceType InstanceType) *Column {
485486
isNullable := false
486487
if isNullableType(colType) {
487488
isNullable = true
488489
colType = strings.TrimSuffix(strings.TrimPrefix(colType, "Nullable("), ")")
489490
}
491+
r := GetTypeResolver(instanceType)
490492

491493
if isArrayType(colType) {
492494
arrayType := strings.TrimSuffix(strings.TrimPrefix(colType, "Array("), ")")
@@ -495,7 +497,7 @@ func ResolveColumn(colName, colType string) *Column {
495497
arrayType = strings.TrimSuffix(strings.TrimPrefix(arrayType, "Nullable("), ")")
496498
}
497499
if isArrayType(arrayType) {
498-
innerColumn := ResolveColumn("inner", arrayType)
500+
innerColumn := ResolveColumn("inner", arrayType, instanceType)
499501
if innerColumn == nil {
500502
logger.Warn().Msgf("invalid inner array type for column %s, %s", colName, colType)
501503
return nil
@@ -508,7 +510,7 @@ func ResolveColumn(colName, colType string) *Column {
508510
},
509511
}
510512
}
511-
GoType := ResolveType(arrayType)
513+
GoType := r.ResolveType(arrayType)
512514
if GoType != nil {
513515
return &Column{
514516
Name: colName,
@@ -518,7 +520,7 @@ func ResolveColumn(colName, colType string) *Column {
518520
},
519521
}
520522
} else if isTupleType(arrayType) {
521-
tupleColumn := ResolveColumn("Tuple", arrayType)
523+
tupleColumn := ResolveColumn("Tuple", arrayType, instanceType)
522524
if tupleColumn == nil {
523525
logger.Warn().Msgf("invalid tuple type for column %s, %s", colName, colType)
524526
return nil
@@ -558,7 +560,7 @@ func ResolveColumn(colName, colType string) *Column {
558560
Name: colName,
559561
Type: BaseType{
560562
Name: "Int32",
561-
GoType: NewBaseType("Int32").GoType,
563+
GoType: NewBaseTypeWithInstanceName("Int32", instanceType).GoType,
562564
},
563565
}
564566
}
@@ -567,12 +569,12 @@ func ResolveColumn(colName, colType string) *Column {
567569
if strings.HasPrefix(colType, "DateTime") {
568570
colType = removePrecision(colType)
569571
}
570-
if GoType := ResolveType(colType); GoType != nil {
572+
if GoType := r.ResolveType(colType); GoType != nil {
571573
return &Column{
572574
Name: colName,
573575
Type: BaseType{
574576
Name: colType,
575-
GoType: NewBaseType(colType).GoType,
577+
GoType: NewBaseTypeWithInstanceName(colType, instanceType).GoType,
576578
Nullable: isNullable,
577579
},
578580
}
@@ -583,7 +585,7 @@ func ResolveColumn(colName, colType string) *Column {
583585
Name: colName,
584586
Type: BaseType{
585587
Name: typeName,
586-
GoType: NewBaseType("Unknown").GoType,
588+
GoType: NewBaseTypeWithInstanceName("Unknown", instanceType).GoType,
587589
Nullable: isNullable,
588590
},
589591
}

platform/database_common/table_discovery_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ func Test_resolveColumn(t *testing.T) {
146146

147147
for i, tt := range tests {
148148
t.Run(util.PrettyTestName(tt.name, i), func(t *testing.T) {
149-
assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType)
149+
assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType, ClickHouseInstance), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType)
150150
})
151151
}
152152
}
@@ -300,7 +300,7 @@ func Test_resolveColumn_Nullable(t *testing.T) {
300300

301301
for i, tt := range tests {
302302
t.Run(util.PrettyTestName(tt.name, i), func(t *testing.T) {
303-
assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType)
303+
assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType, ClickHouseInstance), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType)
304304
})
305305
}
306306
}

platform/doris/doris_schema.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright Quesma, licensed under the Elastic License 2.0.
2+
// SPDX-License-Identifier: Elastic-2.0
3+
package doris
4+
5+
import (
6+
"reflect"
7+
"strings"
8+
"time"
9+
)
10+
11+
// this is catch all type for all types we do not exlicitly support
12+
type UnknownType struct{}
13+
14+
type DorisTypeResolver struct{}
15+
16+
func (r *DorisTypeResolver) ResolveType(dorisTypeName string) reflect.Type {
17+
dorisTypeName = strings.ToLower(dorisTypeName)
18+
switch dorisTypeName {
19+
case "char", "varchar", "string", "text":
20+
return reflect.TypeOf("")
21+
case "date", "datetime", "datev2", "datetimev2":
22+
return reflect.TypeOf(time.Time{})
23+
case "tinyint", "smallint", "int":
24+
return reflect.TypeOf(int32(0))
25+
case "bigint":
26+
return reflect.TypeOf(int64(0))
27+
case "largeint":
28+
return reflect.TypeOf("") // LargeInt is typically handled as string due to size
29+
case "boolean":
30+
return reflect.TypeOf(true)
31+
case "float":
32+
return reflect.TypeOf(float32(0))
33+
case "double":
34+
return reflect.TypeOf(float64(0))
35+
case "decimal", "decimalv2":
36+
return reflect.TypeOf("") // Decimals often handled as strings for precision
37+
case "json":
38+
return reflect.TypeOf(map[string]interface{}{})
39+
case "hll":
40+
return reflect.TypeOf([]byte{}) // HLL is a binary type
41+
case "bitmap":
42+
return reflect.TypeOf([]byte{}) // Bitmap is also binary
43+
case "array":
44+
return reflect.TypeOf([]interface{}{})
45+
case "map":
46+
return reflect.TypeOf(map[string]interface{}{})
47+
case "struct":
48+
return reflect.TypeOf(map[string]interface{}{})
49+
case "Unknown":
50+
return reflect.TypeOf(UnknownType{})
51+
}
52+
return nil
53+
}

platform/ingest/processor.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ func (ip *IngestProcessor) createTableObject(tableName string, columnsFromJson [
230230
// Remove DEFAULT clause from the type
231231
colType = strings.Split(colType, " DEFAULT")[0]
232232
}
233-
resCol := database_common.ResolveColumn(name, colType)
233+
resCol := database_common.ResolveColumn(name, colType, database_common.ClickHouseInstance)
234234
return resCol.Type
235235
}
236236

0 commit comments

Comments
 (0)