diff --git a/platform/clickhouse/clickhouse_schema.go b/platform/clickhouse/clickhouse_schema.go new file mode 100644 index 000000000..0145ca9ec --- /dev/null +++ b/platform/clickhouse/clickhouse_schema.go @@ -0,0 +1,41 @@ +// Copyright Quesma, licensed under the Elastic License 2.0. +// SPDX-License-Identifier: Elastic-2.0 +package clickhouse + +import ( + "reflect" + "time" +) + +// this is catch all type for all types we do not exlicitly support +type UnknownType struct{} + +type ClickhouseTypeResolver struct{} + +func (r *ClickhouseTypeResolver) ResolveType(clickHouseTypeName string) reflect.Type { + switch clickHouseTypeName { + case "String", "LowCardinality(String)", "UUID", "FixedString": + return reflect.TypeOf("") + case "DateTime64", "DateTime", "Date", "DateTime64(3)": + return reflect.TypeOf(time.Time{}) + case "UInt8", "UInt16", "UInt32", "UInt64": + return reflect.TypeOf(uint64(0)) + case "Int8", "Int16", "Int32": + return reflect.TypeOf(int32(0)) + case "Int64": + return reflect.TypeOf(int64(0)) + case "Float32", "Float64": + return reflect.TypeOf(float64(0)) + case "Point": + return reflect.TypeOf(Point{}) + case "Bool": + return reflect.TypeOf(true) + case "JSON": + return reflect.TypeOf(map[string]interface{}{}) + case "Map(String, Nullable(String))", "Map(String, String)", "Map(LowCardinality(String), String)", "Map(LowCardinality(String), Nullable(String))": + return reflect.TypeOf(map[string]string{}) + case "Unknown": + return reflect.TypeOf(UnknownType{}) + } + return nil +} diff --git a/platform/database_common/schema.go b/platform/database_common/schema.go index 067023ea2..400a41872 100644 --- a/platform/database_common/schema.go +++ b/platform/database_common/schema.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/QuesmaOrg/quesma/platform/clickhouse" "github.com/QuesmaOrg/quesma/platform/config" + "github.com/QuesmaOrg/quesma/platform/doris" "github.com/QuesmaOrg/quesma/platform/logger" "github.com/QuesmaOrg/quesma/platform/schema" "github.com/QuesmaOrg/quesma/platform/util" @@ -68,6 +69,13 @@ type ( Origin schema.FieldSource // TODO this field is just added to have way to forward information to the schema registry and should be considered as a technical debt } DateTimeType int + InstanceType int +) + +const ( + DorisInstance InstanceType = iota + ClickHouseInstance + UnknownInstance ) const ( @@ -155,6 +163,18 @@ func (t MultiValueType) StringWithNullable() string { return t.String() } +func GetInstanceType(instanceName string) InstanceType { + switch instanceName { + case "clickhouse": + return DorisInstance + case "doris": + return ClickHouseInstance + default: + logger.Fatal().Msgf("unknown instance name: %s", instanceName) + return UnknownInstance + } +} + func (t MultiValueType) createTableString(indentLvl int) string { var sb strings.Builder sb.WriteString(t.Name + "\n" + util.Indent(indentLvl) + "(\n") @@ -211,7 +231,9 @@ func (t MultiValueType) GetColumn(name string) *Column { } func NewBaseType(clickHouseTypeName string) BaseType { - var GoType = ResolveType(clickHouseTypeName) + // TODO: currently, NewBaseType is only used in tests or create table or insert, not in Doris's code, so the ClickHouse schema is used here. + var r TypeResolver = &clickhouse.ClickhouseTypeResolver{} + var GoType = r.ResolveType(clickHouseTypeName) if GoType == nil { // default, probably good for dates, etc. GoType = reflect.TypeOf("") @@ -219,36 +241,31 @@ func NewBaseType(clickHouseTypeName string) BaseType { return BaseType{Name: clickHouseTypeName, GoType: GoType} } -// this is catch all type for all types we do not exlicitly support -type UnknownType struct{} - -func ResolveType(clickHouseTypeName string) reflect.Type { - switch clickHouseTypeName { - case "String", "LowCardinality(String)", "UUID", "FixedString": - return reflect.TypeOf("") - case "DateTime64", "DateTime", "Date", "DateTime64(3)": - return reflect.TypeOf(time.Time{}) - case "UInt8", "UInt16", "UInt32", "UInt64": - return reflect.TypeOf(uint64(0)) - case "Int8", "Int16", "Int32": - return reflect.TypeOf(int32(0)) - case "Int64": - return reflect.TypeOf(int64(0)) - case "Float32", "Float64": - return reflect.TypeOf(float64(0)) - case "Point": - return reflect.TypeOf(clickhouse.Point{}) - case "Bool": - return reflect.TypeOf(true) - case "JSON": - return reflect.TypeOf(map[string]interface{}{}) - case "Map(String, Nullable(String))", "Map(String, String)", "Map(LowCardinality(String), String)", "Map(LowCardinality(String), Nullable(String))": - return reflect.TypeOf(map[string]string{}) - case "Unknown": - return reflect.TypeOf(UnknownType{}) +func NewBaseTypeWithInstanceName(typeName string, instanceType InstanceType) BaseType { + r := GetTypeResolver(instanceType) + var GoType = r.ResolveType(typeName) + if GoType == nil { + // default, probably good for dates, etc. + GoType = reflect.TypeOf("") + } + return BaseType{Name: typeName, GoType: GoType} +} + +func GetTypeResolver(instanceType InstanceType) TypeResolver { + var r TypeResolver + switch instanceType { + case DorisInstance: + r = &doris.DorisTypeResolver{} + case ClickHouseInstance: + r = &clickhouse.ClickhouseTypeResolver{} + default: + logger.Warn().Msgf("unknown instance type: %v", instanceType) } + return r +} - return nil +type TypeResolver interface { + ResolveType(typeName string) reflect.Type } // 'value': value of a field, from unmarshalled JSON diff --git a/platform/database_common/schema_test.go b/platform/database_common/schema_test.go index a411e6edd..5a74e78e7 100644 --- a/platform/database_common/schema_test.go +++ b/platform/database_common/schema_test.go @@ -17,6 +17,8 @@ func TestGetDateTimeType(t *testing.T) { "timestamp2": {Name: "timestamp2", Type: NewBaseType("DateTime('UTC')")}, "timestamp64_1": {Name: "timestamp64_1", Type: NewBaseType("DateTime64")}, "timestamp64_2": {Name: "timestamp64_2", Type: NewBaseType("DateTime64(3, 'UTC')")}, + "datetime1": {Name: "datetime1", Type: NewBaseType("datetime")}, + "date1": {Name: "date1", Type: NewBaseType("date")}, }, Config: NewChTableConfigTimestampStringAttr(), } @@ -25,5 +27,7 @@ func TestGetDateTimeType(t *testing.T) { assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "timestamp64_1", true)) assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "timestamp64_2", true)) assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, timestampFieldName, true)) // default, created by us + assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "datetime1", true)) + assert.Equal(t, DateTime, table.GetDateTimeType(ctx, "date1", true)) assert.Equal(t, Invalid, table.GetDateTimeType(ctx, "non-existent", false)) } diff --git a/platform/database_common/table.go b/platform/database_common/table.go index ce4f1f4d2..5a2fd7ea5 100644 --- a/platform/database_common/table.go +++ b/platform/database_common/table.go @@ -58,10 +58,10 @@ func (t *Table) GetDateTimeType(ctx context.Context, fieldName string, dateInSch if col, ok := t.Cols[fieldName]; ok { typeName := col.Type.String() // hasPrefix, not equal, because we can have DateTime64(3) and we want to catch it - if strings.HasPrefix(typeName, "DateTime64") { + if strings.HasPrefix(typeName, "DateTime64") || strings.HasPrefix(typeName, "datetime") { return DateTime64 } - if strings.HasPrefix(typeName, "DateTime") { + if strings.HasPrefix(typeName, "DateTime") || strings.HasPrefix(typeName, "date") { return DateTime } } diff --git a/platform/database_common/table_discovery.go b/platform/database_common/table_discovery.go index ad3b5d310..a79269ccf 100644 --- a/platform/database_common/table_discovery.go +++ b/platform/database_common/table_discovery.go @@ -380,6 +380,7 @@ func (td *tableDiscovery) autoConfigureTables(tables map[string]map[string]colum } func (td *tableDiscovery) populateTableDefinitions(configuredTables map[string]discoveredTable, databaseName string, cfg *config.QuesmaConfiguration) { + instanceType := GetInstanceType(td.dbConnPool.InstanceName()) tableMap := NewTableMap() for tableName, resTable := range configuredTables { @@ -393,7 +394,7 @@ func (td *tableDiscovery) populateTableDefinitions(configuredTables map[string]d } } - column := ResolveColumn(col, columnMeta.colType) + column := ResolveColumn(col, columnMeta.colType, instanceType) if column != nil { column.Comment = columnMeta.comment column.Origin = columnMeta.origin @@ -481,12 +482,13 @@ func (td *tableDiscovery) TableDefinitions() *TableMap { return td.tableDefinitions.Load() } -func ResolveColumn(colName, colType string) *Column { +func ResolveColumn(colName, colType string, instanceType InstanceType) *Column { isNullable := false if isNullableType(colType) { isNullable = true colType = strings.TrimSuffix(strings.TrimPrefix(colType, "Nullable("), ")") } + r := GetTypeResolver(instanceType) if isArrayType(colType) { arrayType := strings.TrimSuffix(strings.TrimPrefix(colType, "Array("), ")") @@ -495,7 +497,7 @@ func ResolveColumn(colName, colType string) *Column { arrayType = strings.TrimSuffix(strings.TrimPrefix(arrayType, "Nullable("), ")") } if isArrayType(arrayType) { - innerColumn := ResolveColumn("inner", arrayType) + innerColumn := ResolveColumn("inner", arrayType, instanceType) if innerColumn == nil { logger.Warn().Msgf("invalid inner array type for column %s, %s", colName, colType) return nil @@ -508,7 +510,7 @@ func ResolveColumn(colName, colType string) *Column { }, } } - GoType := ResolveType(arrayType) + GoType := r.ResolveType(arrayType) if GoType != nil { return &Column{ Name: colName, @@ -518,7 +520,7 @@ func ResolveColumn(colName, colType string) *Column { }, } } else if isTupleType(arrayType) { - tupleColumn := ResolveColumn("Tuple", arrayType) + tupleColumn := ResolveColumn("Tuple", arrayType, instanceType) if tupleColumn == nil { logger.Warn().Msgf("invalid tuple type for column %s, %s", colName, colType) return nil @@ -558,7 +560,7 @@ func ResolveColumn(colName, colType string) *Column { Name: colName, Type: BaseType{ Name: "Int32", - GoType: NewBaseType("Int32").GoType, + GoType: NewBaseTypeWithInstanceName("Int32", instanceType).GoType, }, } } @@ -567,12 +569,12 @@ func ResolveColumn(colName, colType string) *Column { if strings.HasPrefix(colType, "DateTime") { colType = removePrecision(colType) } - if GoType := ResolveType(colType); GoType != nil { + if GoType := r.ResolveType(colType); GoType != nil { return &Column{ Name: colName, Type: BaseType{ Name: colType, - GoType: NewBaseType(colType).GoType, + GoType: NewBaseTypeWithInstanceName(colType, instanceType).GoType, Nullable: isNullable, }, } @@ -583,7 +585,7 @@ func ResolveColumn(colName, colType string) *Column { Name: colName, Type: BaseType{ Name: typeName, - GoType: NewBaseType("Unknown").GoType, + GoType: NewBaseTypeWithInstanceName("Unknown", instanceType).GoType, Nullable: isNullable, }, } diff --git a/platform/database_common/table_discovery_test.go b/platform/database_common/table_discovery_test.go index 9c2cc4ffa..1d67e4028 100644 --- a/platform/database_common/table_discovery_test.go +++ b/platform/database_common/table_discovery_test.go @@ -146,7 +146,7 @@ func Test_resolveColumn(t *testing.T) { for i, tt := range tests { t.Run(util.PrettyTestName(tt.name, i), func(t *testing.T) { - assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType) + assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType, ClickHouseInstance), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType) }) } } @@ -300,7 +300,7 @@ func Test_resolveColumn_Nullable(t *testing.T) { for i, tt := range tests { t.Run(util.PrettyTestName(tt.name, i), func(t *testing.T) { - assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType) + assert.Equalf(t, tt.want, ResolveColumn(tt.args.colName, tt.args.colType, ClickHouseInstance), "ResolveColumn(%v, %v)", tt.args.colName, tt.args.colType) }) } } diff --git a/platform/doris/doris_schema.go b/platform/doris/doris_schema.go new file mode 100644 index 000000000..707cd34e9 --- /dev/null +++ b/platform/doris/doris_schema.go @@ -0,0 +1,53 @@ +// Copyright Quesma, licensed under the Elastic License 2.0. +// SPDX-License-Identifier: Elastic-2.0 +package doris + +import ( + "reflect" + "strings" + "time" +) + +// this is catch all type for all types we do not exlicitly support +type UnknownType struct{} + +type DorisTypeResolver struct{} + +func (r *DorisTypeResolver) ResolveType(dorisTypeName string) reflect.Type { + dorisTypeName = strings.ToLower(dorisTypeName) + switch dorisTypeName { + case "char", "varchar", "string", "text": + return reflect.TypeOf("") + case "date", "datetime", "datev2", "datetimev2": + return reflect.TypeOf(time.Time{}) + case "tinyint", "smallint", "int": + return reflect.TypeOf(int32(0)) + case "bigint": + return reflect.TypeOf(int64(0)) + case "largeint": + return reflect.TypeOf("") // LargeInt is typically handled as string due to size + case "boolean": + return reflect.TypeOf(true) + case "float": + return reflect.TypeOf(float32(0)) + case "double": + return reflect.TypeOf(float64(0)) + case "decimal", "decimalv2": + return reflect.TypeOf("") // Decimals often handled as strings for precision + case "json": + return reflect.TypeOf(map[string]interface{}{}) + case "hll": + return reflect.TypeOf([]byte{}) // HLL is a binary type + case "bitmap": + return reflect.TypeOf([]byte{}) // Bitmap is also binary + case "array": + return reflect.TypeOf([]interface{}{}) + case "map": + return reflect.TypeOf(map[string]interface{}{}) + case "struct": + return reflect.TypeOf(map[string]interface{}{}) + case "Unknown": + return reflect.TypeOf(UnknownType{}) + } + return nil +} diff --git a/platform/ingest/processor.go b/platform/ingest/processor.go index 8951dd6e9..7c744b334 100644 --- a/platform/ingest/processor.go +++ b/platform/ingest/processor.go @@ -230,7 +230,7 @@ func (ip *IngestProcessor) createTableObject(tableName string, columnsFromJson [ // Remove DEFAULT clause from the type colType = strings.Split(colType, " DEFAULT")[0] } - resCol := database_common.ResolveColumn(name, colType) + resCol := database_common.ResolveColumn(name, colType, database_common.ClickHouseInstance) return resCol.Type }