Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit b365143

Browse files
authored
ElasticSearch schema consistency fixes, query fixes (#1371)
This PR fixes two issues: - Ensuring consistency by aligning behavior with Elasticsearch (field_caps inferred types). - Preventing query failures. <img width="1681" alt="image" src="https://github.com/user-attachments/assets/8f6a1f26-0862-40e4-b336-4b5779f8882c" /> <img width="1686" alt="image" src="https://github.com/user-attachments/assets/b355b475-cf59-4b04-93a6-b6c993c24dd0" /> <!-- A note on testing your PR --> <!-- Basic unit test run is executed against each commit in the PR. If you want to run a full integration test suite, you can trigger it by commenting with '/run-integration-tests' -->
1 parent c1f1979 commit b365143

File tree

7 files changed

+36
-41
lines changed

7 files changed

+36
-41
lines changed

platform/clickhouse/type_adapter.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ func (c SchemaTypeAdapter) Convert(s string) (schema.QuesmaType, bool) {
2222
}
2323

2424
switch s {
25-
case "String", "LowCardinality(String)", "UUID", "FixedString":
25+
case "String":
26+
return schema.QuesmaTypeText, true
27+
case "LowCardinality(String)", "UUID", "FixedString":
2628
return schema.QuesmaTypeKeyword, true
2729
case "Int", "Int8", "Int16", "Int32", "Int64":
2830
return schema.QuesmaTypeLong, true

platform/frontend_connectors/schema_transformer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,7 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
10031003
{TransformationName: "PhysicalFromExpressionTransformation", Transformation: s.applyPhysicalFromExpression},
10041004
{TransformationName: "WildcardExpansion", Transformation: s.applyWildcardExpansion},
10051005
{TransformationName: "RuntimeMappings", Transformation: s.applyRuntimeMappings},
1006+
{TransformationName: "FieldMapSyntaxTransformation", Transformation: s.applyFieldMapSyntax},
10061007
{TransformationName: "AliasColumnsTransformation", Transformation: s.applyAliasColumns},
10071008

10081009
// Section 2: generic schema based transformations
@@ -1011,7 +1012,6 @@ func (s *SchemaCheckPass) Transform(queries []*model.Query) ([]*model.Query, err
10111012
// because WildcardExpansion expands the wildcard to all fields
10121013
// and columns are expanded as PublicFieldName, so we need to encode them
10131014
// or in other words use internal field names
1014-
{TransformationName: "FieldMapSyntaxTransformation", Transformation: s.applyFieldMapSyntax},
10151015
{TransformationName: "FieldEncodingTransformation", Transformation: s.applyFieldEncoding},
10161016
{TransformationName: "FullTextFieldTransformation", Transformation: s.applyFullTextField},
10171017
{TransformationName: "TimestampFieldTransformation", Transformation: s.applyTimestampField},

platform/frontend_connectors/schema_transformer_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1076,7 +1076,7 @@ func TestFullTextFields(t *testing.T) {
10761076
var schemaColumns []schema.Column
10771077

10781078
for _, col := range columns {
1079-
schemaColumns = append(schemaColumns, schema.Column{Name: col, Type: "String"})
1079+
schemaColumns = append(schemaColumns, schema.Column{Name: col, Type: "LowCardinality(String)"})
10801080
}
10811081

10821082
columnMap := make(map[string]schema.Column)

platform/functionality/field_capabilities/field_caps_test.go

Lines changed: 23 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,12 @@ func TestFieldCapsMultipleIndexes(t *testing.T) {
211211
Tables: map[schema.IndexName]schema.Schema{
212212
"logs-1": {
213213
Fields: map[schema.FieldName]schema.Field{
214-
"foo.bar1": {PropertyName: "foo.bar1", InternalPropertyName: "foo.bar1", Type: schema.QuesmaTypeKeyword},
214+
"foo.bar1": {PropertyName: "foo.bar1", InternalPropertyName: "foo.bar1", Type: schema.QuesmaTypeText},
215215
},
216216
},
217217
"logs-2": {
218218
Fields: map[schema.FieldName]schema.Field{
219-
"foo.bar2": {PropertyName: "foo.bar2", InternalPropertyName: "foo.bar2", Type: schema.QuesmaTypeKeyword},
219+
"foo.bar2": {PropertyName: "foo.bar2", InternalPropertyName: "foo.bar2", Type: schema.QuesmaTypeText},
220220
},
221221
},
222222
},
@@ -225,68 +225,61 @@ func TestFieldCapsMultipleIndexes(t *testing.T) {
225225
expectedResp, err := json.MarshalIndent([]byte(`{
226226
"fields": {
227227
"foo.bar1": {
228-
"keyword": {
229-
"aggregatable": true,
230-
"searchable": true,
228+
"text": {
229+
"aggregatable": false,
230+
"indices": ["logs-1"],
231231
"metadata_field": false,
232-
"type": "keyword",
233-
"indices": ["logs-1"]
232+
"searchable": true,
233+
"type": "text"
234234
}
235235
},
236-
"foo.bar1.keyword": {
236+
"foo.bar1.keyword": {
237237
"keyword": {
238238
"aggregatable": true,
239-
"searchable": true,
239+
"indices": ["logs-1"],
240240
"metadata_field": false,
241-
"type": "keyword",
242-
"indices": ["logs-1"]
241+
"searchable": true,
242+
"type": "keyword"
243243
}
244244
},
245245
"foo.bar1.text": {
246246
"text": {
247247
"aggregatable": false,
248-
"indices": [
249-
"logs-1"
250-
],
248+
"indices": ["logs-1"],
251249
"metadata_field": false,
252250
"searchable": true,
253251
"type": "text"
254252
}
255253
},
256254
"foo.bar2": {
257-
"keyword": {
258-
"aggregatable": true,
259-
"searchable": true,
255+
"text": {
256+
"aggregatable": false,
257+
"indices": ["logs-2"],
260258
"metadata_field": false,
261-
"type": "keyword",
262-
"indices": ["logs-2"]
259+
"searchable": true,
260+
"type": "text"
263261
}
264262
},
265-
"foo.bar2.keyword": {
263+
"foo.bar2.keyword": {
266264
"keyword": {
267265
"aggregatable": true,
268-
"searchable": true,
266+
"indices": ["logs-2"],
269267
"metadata_field": false,
270-
"type": "keyword",
271-
"indices": ["logs-2"]
268+
"searchable": true,
269+
"type": "keyword"
272270
}
273271
},
274272
"foo.bar2.text": {
275273
"text": {
276274
"aggregatable": false,
277-
"indices": [
278-
"logs-2"
279-
],
275+
"indices": ["logs-2"],
280276
"metadata_field": false,
281277
"searchable": true,
282278
"type": "text"
283279
}
284280
}
285281
},
286-
"indices": [
287-
"logs-1",
288-
"logs-2"
289-
]
282+
"indices": ["logs-1", "logs-2"]
290283
}
291284
`), "", " ")
292285
assert.NoError(t, err)

platform/schema/registry.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,13 +296,11 @@ func (s *schemaRegistry) populateAliases(indexConfiguration config.IndexConfigur
296296
}
297297

298298
func (s *schemaRegistry) populateSchemaFromTableDefinition(definitions map[string]Table, indexName string, fields map[FieldName]Field, internalToPublicFieldsEncodings map[EncodedFieldName]string) (existsInDataSource bool) {
299-
300299
tableDefinition, found := definitions[indexName]
301300
if found {
302301
logger.Debug().Msgf("loading schema for table %s", indexName)
303302

304303
for _, column := range tableDefinition.Columns {
305-
306304
var propertyName FieldName
307305
if internalField, ok := internalToPublicFieldsEncodings[EncodedFieldName(column.Name)]; ok {
308306
propertyName = FieldName(internalField)
@@ -331,7 +329,7 @@ func (s *schemaRegistry) populateSchemaFromTableDefinition(definitions map[strin
331329
fields[propertyName] = Field{PropertyName: propertyName, InternalPropertyName: FieldName(column.Name), InternalPropertyType: column.Type, Type: quesmaType, Origin: column.Origin}
332330
} else {
333331
logger.Debug().Msgf("type %s not supported, falling back to keyword", column.Type)
334-
fields[propertyName] = Field{PropertyName: propertyName, InternalPropertyName: FieldName(column.Name), InternalPropertyType: column.Type, Type: QuesmaTypeKeyword}
332+
fields[propertyName] = Field{PropertyName: propertyName, InternalPropertyName: FieldName(column.Name), InternalPropertyType: column.Type, Type: QuesmaTypeKeyword, Origin: column.Origin}
335333
}
336334
} else {
337335
fields[propertyName] = Field{PropertyName: propertyName, InternalPropertyName: FieldName(column.Name), InternalPropertyType: column.Type, Type: existing.Type, Origin: existing.Origin}

platform/schema/registry_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func Test_schemaRegistry_FindSchema(t *testing.T) {
4646
}},
4747
tableName: "some_table",
4848
want: schema.NewSchema(map[schema.FieldName]schema.Field{
49-
"message": {PropertyName: "message", InternalPropertyName: "message", Type: schema.QuesmaTypeKeyword, InternalPropertyType: "String"},
49+
"message": {PropertyName: "message", InternalPropertyName: "message", Type: schema.QuesmaTypeText, InternalPropertyType: "String"},
5050
"event_date": {PropertyName: "event_date", InternalPropertyName: "event_date", Type: schema.QuesmaTypeTimestamp, InternalPropertyType: "DateTime64"},
5151
"count": {PropertyName: "count", InternalPropertyName: "count", Type: schema.QuesmaTypeLong, InternalPropertyType: "Int64"}},
5252
true, ""),
@@ -308,7 +308,7 @@ func Test_schemaRegistry_UpdateDynamicConfiguration(t *testing.T) {
308308
defer s.Stop()
309309

310310
expectedSchema := schema.NewSchema(map[schema.FieldName]schema.Field{
311-
"message": {PropertyName: "message", InternalPropertyName: "message", Type: schema.QuesmaTypeKeyword, InternalPropertyType: "String"},
311+
"message": {PropertyName: "message", InternalPropertyName: "message", Type: schema.QuesmaTypeText, InternalPropertyType: "String"},
312312
"event_date": {PropertyName: "event_date", InternalPropertyName: "event_date", Type: schema.QuesmaTypeTimestamp, InternalPropertyType: "DateTime64"},
313313
"count": {PropertyName: "count", InternalPropertyName: "count", Type: schema.QuesmaTypeLong, InternalPropertyType: "Int64"}},
314314
true, "")
@@ -328,7 +328,7 @@ func Test_schemaRegistry_UpdateDynamicConfiguration(t *testing.T) {
328328
})
329329

330330
expectedSchema = schema.NewSchema(map[schema.FieldName]schema.Field{
331-
"message": {PropertyName: "message", InternalPropertyName: "message", Type: schema.QuesmaTypeKeyword, InternalPropertyType: "String"},
331+
"message": {PropertyName: "message", InternalPropertyName: "message", Type: schema.QuesmaTypeText, InternalPropertyType: "String"},
332332
"event_date": {PropertyName: "event_date", InternalPropertyName: "event_date", Type: schema.QuesmaTypeTimestamp, InternalPropertyType: "DateTime64"},
333333
"count": {PropertyName: "count", InternalPropertyName: "count", Type: schema.QuesmaTypeLong, InternalPropertyType: "Int64"},
334334
"new_column": {PropertyName: "new_column", InternalPropertyName: "new_column", Type: schema.QuesmaTypeText, Origin: schema.FieldSourceMapping}},

platform/schema/types.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ var (
2424
QuesmaTypeDate = QuesmaType{Name: "date", Properties: []QuesmaTypeProperty{Searchable, Aggregatable}}
2525
QuesmaTypeFloat = QuesmaType{Name: "float", Properties: []QuesmaTypeProperty{Searchable, Aggregatable}}
2626
QuesmaTypeBoolean = QuesmaType{Name: "boolean", Properties: []QuesmaTypeProperty{Searchable, Aggregatable}}
27-
QuesmaTypeObject = QuesmaType{Name: "object", Properties: []QuesmaTypeProperty{Searchable}}
27+
QuesmaTypeObject = QuesmaType{Name: "object", Properties: []QuesmaTypeProperty{}}
2828
QuesmaTypeArray = QuesmaType{Name: "array", Properties: []QuesmaTypeProperty{Searchable}}
29-
QuesmaTypeMap = QuesmaType{Name: "map", Properties: []QuesmaTypeProperty{Searchable}}
29+
QuesmaTypeMap = QuesmaType{Name: "map", Properties: []QuesmaTypeProperty{}}
3030
QuesmaTypeIp = QuesmaType{Name: "ip", Properties: []QuesmaTypeProperty{Searchable, Aggregatable}}
3131
QuesmaTypePoint = QuesmaType{Name: "point", Properties: []QuesmaTypeProperty{Searchable, Aggregatable}}
3232
QuesmaTypeUnknown = QuesmaType{Name: "unknown", Properties: []QuesmaTypeProperty{Searchable}}
@@ -60,6 +60,8 @@ func (t QuesmaType) String() string {
6060

6161
func ParseQuesmaType(t string) (QuesmaType, bool) {
6262
switch t {
63+
case QuesmaTypeInteger.Name:
64+
return QuesmaTypeInteger, true
6365
case QuesmaTypeText.Name:
6466
return QuesmaTypeText, true
6567
case QuesmaTypeKeyword.Name:

0 commit comments

Comments
 (0)