Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 1a8a78b

Browse files
committed
Fixing quesma common table dynamic mapping case
1 parent 654b6c7 commit 1a8a78b

File tree

6 files changed

+80
-34
lines changed

6 files changed

+80
-34
lines changed

quesma/ingest/alter_table_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func TestAlterTable(t *testing.T) {
5151

5252
ip := newIngestProcessorWithEmptyTableMap(fieldsMap, &config.QuesmaConfiguration{})
5353
for i := range rowsToInsert {
54-
alter, onlySchemaFields, nonSchemaFields, err := ip.GenerateIngestContent(table, types.MustJSON(rowsToInsert[i]), nil, chConfig, encodings)
54+
alter, _, onlySchemaFields, nonSchemaFields, err := ip.GenerateIngestContent(table, types.MustJSON(rowsToInsert[i]), nil, chConfig, encodings)
5555
assert.NoError(t, err)
5656
insert, err := generateInsertJson(nonSchemaFields, onlySchemaFields)
5757
assert.Equal(t, expectedInsert[i], insert)
@@ -130,7 +130,7 @@ func TestAlterTableHeuristic(t *testing.T) {
130130

131131
assert.Equal(t, int64(0), ip.ingestCounter)
132132
for i := range rowsToInsert {
133-
_, _, _, err := ip.GenerateIngestContent(table, types.MustJSON(rowsToInsert[i]), nil, chConfig, encodings)
133+
_, _, _, _, err := ip.GenerateIngestContent(table, types.MustJSON(rowsToInsert[i]), nil, chConfig, encodings)
134134
assert.NoError(t, err)
135135
}
136136
assert.Equal(t, tc.expected, len(table.Cols))

quesma/ingest/processor.go

Lines changed: 63 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,13 @@ func getAttributesByArrayName(arrayName string,
286286
return attributes
287287
}
288288

289+
type AlterDDL struct {
290+
tableName string
291+
columnName string
292+
columnType string
293+
comment string
294+
}
295+
289296
// This function generates ALTER TABLE commands for adding new columns
290297
// to the table based on the attributesMap and the table name
291298
// AttributesMap contains the attributes that are not part of the schema
@@ -295,12 +302,12 @@ func (ip *IngestProcessor) generateNewColumns(
295302
attrsMap map[string][]interface{},
296303
table *chLib.Table,
297304
alteredAttributesIndexes []int,
298-
encodings map[schema.FieldEncodingKey]schema.EncodedFieldName) []string {
305+
encodings map[schema.FieldEncodingKey]schema.EncodedFieldName) ([]string, map[string]AlterDDL) {
299306
var alterCmd []string
307+
alterDDLMap := make(map[string]AlterDDL)
300308
attrKeys := getAttributesByArrayName(chLib.DeprecatedAttributesKeyColumn, attrsMap)
301309
attrTypes := getAttributesByArrayName(chLib.DeprecatedAttributesValueType, attrsMap)
302310
var deleteIndexes []int
303-
304311
reverseMap := reverseFieldEncoding(encodings, table.Name)
305312

306313
// HACK Alert:
@@ -339,6 +346,7 @@ func (ip *IngestProcessor) generateNewColumns(
339346
alterCmd = append(alterCmd, alterTable)
340347

341348
alterColumn := fmt.Sprintf("ALTER TABLE \"%s\" COMMENT COLUMN \"%s\" '%s'", table.Name, attrKeys[i], comment)
349+
alterDDLMap[attrKeys[i]] = AlterDDL{tableName: table.Name, columnName: attrKeys[i], columnType: columnType, comment: comment}
342350
alterCmd = append(alterCmd, alterColumn)
343351

344352
deleteIndexes = append(deleteIndexes, i)
@@ -358,7 +366,7 @@ func (ip *IngestProcessor) generateNewColumns(
358366
attrsMap[chLib.DeprecatedAttributesValueType] = append(attrsMap[chLib.DeprecatedAttributesValueType][:deleteIndexes[i]], attrsMap[chLib.DeprecatedAttributesValueType][deleteIndexes[i]+1:]...)
359367
attrsMap[chLib.DeprecatedAttributesValueColumn] = append(attrsMap[chLib.DeprecatedAttributesValueColumn][:deleteIndexes[i]], attrsMap[chLib.DeprecatedAttributesValueColumn][deleteIndexes[i]+1:]...)
360368
}
361-
return alterCmd
369+
return alterCmd, alterDDLMap
362370
}
363371

364372
// This struct contains the information about the columns that aren't part of the schema
@@ -496,39 +504,39 @@ func (ip *IngestProcessor) GenerateIngestContent(table *chLib.Table,
496504
data types.JSON,
497505
inValidJson types.JSON,
498506
config *chLib.ChTableConfig,
499-
encodings map[schema.FieldEncodingKey]schema.EncodedFieldName) ([]string, types.JSON, []NonSchemaField, error) {
507+
encodings map[schema.FieldEncodingKey]schema.EncodedFieldName) ([]string, map[string]AlterDDL, types.JSON, []NonSchemaField, error) {
500508

501509
jsonAsBytesSlice, err := json.Marshal(data)
502510

503511
if err != nil {
504-
return nil, nil, nil, err
512+
return nil, nil, nil, nil, err
505513
}
506514

507515
// we find all non-schema fields
508516
jsonMap, err := types.ParseJSON(string(jsonAsBytesSlice))
509517
if err != nil {
510-
return nil, nil, nil, err
518+
return nil, nil, nil, nil, err
511519
}
512520

513521
if len(config.Attributes) == 0 {
514-
return nil, jsonMap, nil, nil
522+
return nil, nil, jsonMap, nil, nil
515523
}
516524

517525
schemaFieldsJson, err := json.Marshal(jsonMap)
518526

519527
if err != nil {
520-
return nil, jsonMap, nil, err
528+
return nil, nil, jsonMap, nil, err
521529
}
522530

523531
mDiff := DifferenceMap(jsonMap, table) // TODO change to DifferenceMap(m, t)
524532

525533
if len(mDiff) == 0 && string(schemaFieldsJson) == string(jsonAsBytesSlice) && len(inValidJson) == 0 { // no need to modify, just insert 'js'
526-
return nil, jsonMap, nil, nil
534+
return nil, nil, jsonMap, nil, nil
527535
}
528536

529537
// check attributes precondition
530538
if len(config.Attributes) <= 0 {
531-
return nil, nil, nil, fmt.Errorf("no attributes config, but received non-schema fields: %s", mDiff)
539+
return nil, nil, nil, nil, fmt.Errorf("no attributes config, but received non-schema fields: %s", mDiff)
532540
}
533541
attrsMap, _ := BuildAttrsMap(mDiff, config)
534542

@@ -538,9 +546,10 @@ func (ip *IngestProcessor) GenerateIngestContent(table *chLib.Table,
538546
// we only want to add fields that are not part of the schema e.g we don't
539547
// have columns for them
540548
var alterCmd []string
549+
alterDDLMap := make(map[string]AlterDDL)
541550
atomic.AddInt64(&ip.ingestCounter, 1)
542551
if ok, alteredAttributesIndexes := ip.shouldAlterColumns(table, attrsMap); ok {
543-
alterCmd = ip.generateNewColumns(attrsMap, table, alteredAttributesIndexes, encodings)
552+
alterCmd, alterDDLMap = ip.generateNewColumns(attrsMap, table, alteredAttributesIndexes, encodings)
544553
}
545554
// If there are some invalid fields, we need to add them to the attributes map
546555
// to not lose them and be able to store them later by
@@ -551,12 +560,12 @@ func (ip *IngestProcessor) GenerateIngestContent(table *chLib.Table,
551560
nonSchemaFields, err := generateNonSchemaFields(attrsMapWithInvalidFields)
552561

553562
if err != nil {
554-
return nil, nil, nil, err
563+
return nil, nil, nil, nil, err
555564
}
556565

557566
onlySchemaFields := RemoveNonSchemaFields(jsonMap, table)
558567

559-
return alterCmd, onlySchemaFields, nonSchemaFields, nil
568+
return alterCmd, alterDDLMap, onlySchemaFields, nonSchemaFields, nil
560569
}
561570

562571
func generateInsertJson(nonSchemaFields []NonSchemaField, onlySchemaFields types.JSON) (string, error) {
@@ -598,7 +607,7 @@ func populateFieldEncodings(jsonData []types.JSON, tableName string) map[schema.
598607
func (ip *IngestProcessor) processInsertQuery(ctx context.Context,
599608
tableName string,
600609
jsonData []types.JSON, transformer jsonprocessor.IngestTransformer,
601-
tableFormatter TableColumNameFormatter, tableDefinitionChangeOnly bool) ([]string, error) {
610+
tableFormatter TableColumNameFormatter, tableDefinitionChangeOnly bool) ([]string, map[string]AlterDDL, error) {
602611
// this is pre ingest transformer
603612
// here we transform the data before it's structure evaluation and insertion
604613
//
@@ -607,7 +616,7 @@ func (ip *IngestProcessor) processInsertQuery(ctx context.Context,
607616
for _, jsonValue := range jsonData {
608617
result, err := preIngestTransformer.Transform(jsonValue)
609618
if err != nil {
610-
return nil, fmt.Errorf("error while rewriting json: %v", err)
619+
return nil, nil, fmt.Errorf("error while rewriting json: %v", err)
611620
}
612621
processed = append(processed, result)
613622
}
@@ -652,7 +661,7 @@ func (ip *IngestProcessor) processInsertQuery(ctx context.Context,
652661
createTableCmd, err = ip.createTableObjectAndAttributes(ctx, createTableCmd, tableConfig, tableName, tableDefinitionChangeOnly)
653662
if err != nil {
654663
logger.ErrorWithCtx(ctx).Msgf("error createTableObjectAndAttributes, can't create table: %v", err)
655-
return nil, err
664+
return nil, nil, err
656665
}
657666
// Set pointer to table after creating it
658667
table = ip.FindTable(tableName)
@@ -662,34 +671,38 @@ func (ip *IngestProcessor) processInsertQuery(ctx context.Context,
662671
tableConfig = table.Config
663672
var jsonsReadyForInsertion []string
664673
var alterCmd []string
674+
alterDDLMapGlobal := make(map[string]AlterDDL)
665675
var preprocessedJsons []types.JSON
666676
var invalidJsons []types.JSON
667677
preprocessedJsons, invalidJsons, err := ip.preprocessJsons(ctx, table.Name, jsonData, transformer)
668678
if err != nil {
669-
return nil, fmt.Errorf("error preprocessJsons: %v", err)
679+
return nil, nil, fmt.Errorf("error preprocessJsons: %v", err)
670680
}
671681
for i, preprocessedJson := range preprocessedJsons {
672-
alter, onlySchemaFields, nonSchemaFields, err := ip.GenerateIngestContent(table, preprocessedJson,
682+
alter, alterDDLMap, onlySchemaFields, nonSchemaFields, err := ip.GenerateIngestContent(table, preprocessedJson,
673683
invalidJsons[i], tableConfig, encodings)
674684

675685
if err != nil {
676-
return nil, fmt.Errorf("error BuildInsertJson, tablename: '%s' : %v", table.Name, err)
686+
return nil, nil, fmt.Errorf("error BuildInsertJson, tablename: '%s' : %v", table.Name, err)
677687
}
678688
insertJson, err := generateInsertJson(nonSchemaFields, onlySchemaFields)
679689
if err != nil {
680-
return nil, fmt.Errorf("error generatateInsertJson, tablename: '%s' json: '%s': %v", table.Name, PrettyJson(insertJson), err)
690+
return nil, nil, fmt.Errorf("error generatateInsertJson, tablename: '%s' json: '%s': %v", table.Name, PrettyJson(insertJson), err)
681691
}
682692
alterCmd = append(alterCmd, alter...)
693+
for key, value := range alterDDLMap {
694+
alterDDLMapGlobal[key] = value
695+
}
683696
if err != nil {
684-
return nil, fmt.Errorf("error BuildInsertJson, tablename: '%s' json: '%s': %v", table.Name, PrettyJson(insertJson), err)
697+
return nil, nil, fmt.Errorf("error BuildInsertJson, tablename: '%s' json: '%s': %v", table.Name, PrettyJson(insertJson), err)
685698
}
686699
jsonsReadyForInsertion = append(jsonsReadyForInsertion, insertJson)
687700
}
688701

689702
insertValues := strings.Join(jsonsReadyForInsertion, ", ")
690703
insert := fmt.Sprintf("INSERT INTO \"%s\" FORMAT JSONEachRow %s", table.Name, insertValues)
691704

692-
return generateSqlStatements(createTableCmd, alterCmd, insert), nil
705+
return generateSqlStatements(createTableCmd, alterCmd, insert), alterDDLMapGlobal, nil
693706
}
694707

695708
func (lm *IngestProcessor) ProcessInsertQuery(ctx context.Context, tableName string,
@@ -708,39 +721,61 @@ func (lm *IngestProcessor) ProcessInsertQuery(ctx context.Context, tableName str
708721
clonedJsonData = append(clonedJsonData, jsonValue.Clone())
709722
}
710723

711-
err := lm.processInsertQueryInternal(ctx, tableName, clonedJsonData, transformer, tableFormatter, true)
724+
err := lm.processInsertQueryInternal(ctx, tableName, clonedJsonData, transformer, tableFormatter, true, nil, tableName)
712725
if err != nil {
713726
// we ignore an error here, because we want to process the data and don't lose it
714727
logger.ErrorWithCtx(ctx).Msgf("error processing insert query - virtual table schema update: %v", err)
715728
}
716-
729+
sourceIndexSchema := findSchemaPointer(lm.schemaRegistry, tableName)
730+
sourceIndex := tableName
717731
pipeline := jsonprocessor.IngestTransformerPipeline{}
718732
pipeline = append(pipeline, &common_table.IngestAddIndexNameTransformer{IndexName: tableName})
719733
pipeline = append(pipeline, transformer)
720734
tableName = common_table.TableName
721735

722-
err = lm.processInsertQueryInternal(ctx, common_table.TableName, jsonData, pipeline, tableFormatter, false)
736+
err = lm.processInsertQueryInternal(ctx, common_table.TableName, jsonData, pipeline, tableFormatter, false, sourceIndexSchema, sourceIndex)
723737
if err != nil {
724738
return fmt.Errorf("error processing insert query to a common table: %w", err)
725739
}
726740

727741
return nil
728742
}
729743

730-
return lm.processInsertQueryInternal(ctx, tableName, jsonData, transformer, tableFormatter, false)
744+
return lm.processInsertQueryInternal(ctx, tableName, jsonData, transformer, tableFormatter, false, nil, tableName)
731745

732746
}
733747

734748
func (ip *IngestProcessor) processInsertQueryInternal(ctx context.Context, tableName string,
735749
jsonData []types.JSON, transformer jsonprocessor.IngestTransformer,
736-
tableFormatter TableColumNameFormatter, isVirtualTable bool) error {
737-
statements, err := ip.processInsertQuery(ctx, tableName, jsonData, transformer, tableFormatter, isVirtualTable)
750+
tableFormatter TableColumNameFormatter, isVirtualTable bool, sourceIndexSchema *schema.Schema, sourceIndex string) error {
751+
752+
statements, alterDDLMap, err := ip.processInsertQuery(ctx, tableName, jsonData, transformer, tableFormatter, isVirtualTable)
738753
if err != nil {
739754
return err
740755
}
741756

742757
var logVirtualTableDDL bool // maybe this should be a part of the config or sth
743758

759+
// TODO that's a hack, we add columns to quesma-common-table that
760+
// came from mappings instead of ingest
761+
if sourceIndexSchema != nil {
762+
if ip.cfg.IndexConfig[sourceIndex].UseCommonTable && len(alterDDLMap) > 0 {
763+
var columnsFromDynamicMapping []string
764+
for _, field := range sourceIndexSchema.Fields {
765+
if _, ok := alterDDLMap[field.InternalPropertyName.AsString()]; !ok {
766+
if field.Origin == schema.FieldSourceMapping {
767+
columnsFromDynamicMapping = append(columnsFromDynamicMapping, fmt.Sprintf("ALTER TABLE \"%s\" ADD COLUMN IF NOT EXISTS \"%s\" %s", tableName, field.InternalPropertyName, field.InternalPropertyType))
768+
metadata := comment_metadata.NewCommentMetadata()
769+
metadata.Values[comment_metadata.ElasticFieldName] = field.PropertyName.AsString()
770+
comment := metadata.Marshall()
771+
columnsFromDynamicMapping = append(columnsFromDynamicMapping, fmt.Sprintf("ALTER TABLE \"%s\" COMMENT COLUMN \"%s\" '%s'", tableName, field.InternalPropertyName, comment))
772+
}
773+
}
774+
}
775+
statements = append(columnsFromDynamicMapping, statements...)
776+
}
777+
}
778+
744779
if isVirtualTable && logVirtualTableDDL {
745780
for _, statement := range statements {
746781
if strings.HasPrefix(statement, "ALTER") || strings.HasPrefix(statement, "CREATE") {

quesma/ingest/processor_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func TestInsertNonSchemaFieldsToOthers_1(t *testing.T) {
7272
assert.True(t, exists)
7373
f := func(t1, t2 TableMap) {
7474
ip := newIngestProcessorWithEmptyTableMap(fieldsMap, &config.QuesmaConfiguration{})
75-
alter, onlySchemaFields, nonSchemaFields, err := ip.GenerateIngestContent(tableName, types.MustJSON(rowToInsert), nil, hasOthersConfig, encodings)
75+
alter, _, onlySchemaFields, nonSchemaFields, err := ip.GenerateIngestContent(tableName, types.MustJSON(rowToInsert), nil, hasOthersConfig, encodings)
7676
assert.NoError(t, err)
7777
j, err := generateInsertJson(nonSchemaFields, onlySchemaFields)
7878
assert.NoError(t, err)

quesma/schema/registry.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ func (s *schemaRegistry) populateSchemaFromDynamicConfiguration(indexName string
117117
continue
118118
}
119119

120-
fields[FieldName(column.Name)] = Field{PropertyName: FieldName(column.Name), InternalPropertyName: FieldName(column.Name), Type: columnType}
120+
fields[FieldName(column.Name)] = Field{PropertyName: FieldName(column.Name), InternalPropertyName: FieldName(column.Name), Type: columnType, Origin: FieldSourceMapping}
121121
}
122122
}
123123

@@ -246,7 +246,7 @@ func (s *schemaRegistry) populateSchemaFromTableDefinition(definitions map[strin
246246
fields[propertyName] = Field{PropertyName: propertyName, InternalPropertyName: FieldName(column.Name), InternalPropertyType: column.Type, Type: QuesmaTypeKeyword}
247247
}
248248
} else {
249-
fields[propertyName] = Field{PropertyName: propertyName, InternalPropertyName: FieldName(column.Name), InternalPropertyType: column.Type, Type: existing.Type}
249+
fields[propertyName] = Field{PropertyName: propertyName, InternalPropertyName: FieldName(column.Name), InternalPropertyType: column.Type, Type: existing.Type, Origin: existing.Origin}
250250
}
251251
}
252252
}

quesma/schema/registry_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ func Test_schemaRegistry_UpdateDynamicConfiguration(t *testing.T) {
319319
"message": {PropertyName: "message", InternalPropertyName: "message", Type: schema.QuesmaTypeKeyword, InternalPropertyType: "String"},
320320
"event_date": {PropertyName: "event_date", InternalPropertyName: "event_date", Type: schema.QuesmaTypeTimestamp, InternalPropertyType: "DateTime64"},
321321
"count": {PropertyName: "count", InternalPropertyName: "count", Type: schema.QuesmaTypeLong, InternalPropertyType: "Int64"},
322-
"new_column": {PropertyName: "new_column", InternalPropertyName: "new_column", Type: schema.QuesmaTypeText}},
322+
"new_column": {PropertyName: "new_column", InternalPropertyName: "new_column", Type: schema.QuesmaTypeText, Origin: schema.FieldSourceMapping}},
323323
true, "")
324324
resultSchema, resultFound = s.FindSchema(schema.TableName(tableName))
325325
assert.True(t, resultFound, "schema not found")

quesma/schema/schema.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@ import (
66
"strings"
77
)
88

9+
// FieldSource is an enum that represents the source of a field in the schema
10+
type FieldSource int
11+
12+
const (
13+
FieldSourceIngest FieldSource = iota
14+
FieldSourceMapping
15+
FieldSourceAutoDiscovery
16+
FieldSourceStaticConfiguration
17+
)
18+
919
type (
1020
Schema struct {
1121
Fields map[FieldName]Field
@@ -23,6 +33,7 @@ type (
2333
InternalPropertyName FieldName
2434
InternalPropertyType string
2535
Type QuesmaType
36+
Origin FieldSource
2637
}
2738
TableName string
2839
FieldName string

0 commit comments

Comments
 (0)