Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit ccccbf9

Browse files
pdelewskimieciu
andauthored
Ingest parser fallback (#1428)
<!-- A note on testing your PR --> <!-- Basic unit test run is executed against each commit in the PR. If you want to run a full integration test suite, you can trigger it by commenting with '/run-integration-tests' or '/run-it' --> --------- Co-authored-by: przemyslaw <[email protected]>
1 parent 7614a7e commit ccccbf9

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

.github/workflows/integration-tests.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ jobs:
4646
uses: ./.github/workflows/build-quesma-docker-image.yml
4747
with:
4848
REF: ${{ github.event.inputs.GIT_REF || needs.check-comment.outputs.ref }}
49+
VERSION: ${{ github.event.inputs.GIT_REF || needs.check-comment.outputs.ref }}
4950

5051
integration-test-run:
5152
runs-on: ubuntu-latest
@@ -75,7 +76,9 @@ jobs:
7576
docker image ls -a
7677
7778
- name: Set environment variable
78-
run: echo "EXECUTING_ON_GITHUB_CI=true" >> $GITHUB_ENV
79+
run: |
80+
echo "EXECUTING_ON_GITHUB_CI=true" >> $GITHUB_ENV
81+
echo "QUESMA_IT_VERSION=${{ github.event.inputs.GIT_REF || needs.check-comment.outputs.ref }}" >> $GITHUB_ENV
7982
8083
- name: Get last commit author
8184
id: get_author

ci/it/testcases/utils.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"context"
99
"fmt"
1010
"github.com/docker/docker/api/types/container"
11+
"github.com/docker/go-connections/nat"
1112
"github.com/testcontainers/testcontainers-go"
1213
"github.com/testcontainers/testcontainers-go/wait"
1314
"io"
@@ -181,8 +182,15 @@ func setupQuesma(ctx context.Context, quesmaConfig string) (testcontainers.Conta
181182
if err != nil {
182183
return nil, err
183184
}
185+
186+
quesmaVersion := os.Getenv("QUESMA_IT_VERSION")
187+
if quesmaVersion == "" {
188+
log.Println("No QUESMA_IT_VERSION environment variable set, watch out for stale images!")
189+
quesmaVersion = "nightly"
190+
}
191+
184192
quesmaReq := testcontainers.ContainerRequest{
185-
Image: "quesma/quesma:nightly",
193+
Image: fmt.Sprintf("quesma/quesma:%s", quesmaVersion),
186194
ExposedPorts: []string{"0.0.0.0::9999/tcp", "0.0.0.0::8080/tcp"},
187195
Env: map[string]string{
188196
"QUESMA_CONFIG_FILE": "/configuration/conf.yaml",
@@ -245,7 +253,10 @@ func setupClickHouse(ctx context.Context) (testcontainers.Container, error) {
245253
HostConfigModifier: func(hc *container.HostConfig) {
246254
hc.ExtraHosts = []string{"localhost-for-github-ci:host-gateway"}
247255
},
248-
WaitingFor: wait.ForExposedPort().WithStartupTimeout(2 * time.Minute),
256+
WaitingFor: wait.ForSQL("9000", "clickhouse",
257+
func(host string, port nat.Port) string {
258+
return fmt.Sprintf("clickhouse://%s:%d", host, port.Int())
259+
}).WithStartupTimeout(2 * time.Minute),
249260
}
250261
return testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
251262
ContainerRequest: req,

platform/ingest/parser.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,24 +55,25 @@ func columnsToString(columnsFromJson []CreateTableEntry,
5555
columnMetadata := comment_metadata.NewCommentMetadata()
5656
columnMetadata.Values[comment_metadata.ElasticFieldName] = propertyName
5757
comment := columnMetadata.Marshall()
58-
5958
if columnFromSchema, found := columnsFromSchema[schema.FieldName(columnFromJson.ClickHouseColumnName)]; found {
60-
// Schema takes precedence over inferred type from JSON
59+
// Check if the type is an Array – if so, fallback to JSON type
6160
if strings.Contains(columnFromJson.ClickHouseType, "Array") {
6261
// The schema (e.g. PUT /:index/_mapping) doesn't contain information about whether a field is an array or not.
6362
// Therefore, we have to combine the information from the schema and the JSON in such case.
6463
// For example: in the mapping we have a field "products.name" with type "keyword" (String)
6564
// and in the JSON "products.name" is an array of strings (Array(String)).
66-
6765
if strings.Count(columnFromJson.ClickHouseType, "Array") > 1 {
6866
logger.Warn().Msgf("Column '%s' has type '%s' - an array nested multiple times. Such case might not be handled correctly.", columnFromJson.ClickHouseColumnName, columnFromJson.ClickHouseType)
6967
}
70-
71-
result.WriteString(fmt.Sprintf("\"%s\" Array(%s) COMMENT '%s'", columnFromSchema.ClickHouseColumnName, columnFromSchema.ClickHouseType, comment))
68+
result.WriteString(fmt.Sprintf("\"%s\" %s '%s'", columnFromJson.ClickHouseColumnName, columnFromJson.ClickHouseType+" COMMENT ", comment))
69+
// TODO this should be changed to use the schema type, but needs further investigation
70+
//result.WriteString(fmt.Sprintf("\"%s\" Array(%s) COMMENT '%s'", columnFromSchema.ClickHouseColumnName, columnFromSchema.ClickHouseType, comment))
7271
} else {
72+
// Use schema type
7373
result.WriteString(fmt.Sprintf("\"%s\" %s '%s'", columnFromSchema.ClickHouseColumnName, columnFromSchema.ClickHouseType+" COMMENT ", comment))
7474
}
7575
} else {
76+
// Not found in schema – fallback to JSON type
7677
result.WriteString(fmt.Sprintf("\"%s\" %s '%s'", columnFromJson.ClickHouseColumnName, columnFromJson.ClickHouseType+" COMMENT ", comment))
7778
}
7879

0 commit comments

Comments
 (0)