From 554b1b89c13bf1e97f80314806c93054ba7a5c1c Mon Sep 17 00:00:00 2001 From: Przemek Delewski Date: Wed, 14 May 2025 16:37:50 +0200 Subject: [PATCH 1/7] Update array types --- ci/it/testcases/test_ingest.go | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/ci/it/testcases/test_ingest.go b/ci/it/testcases/test_ingest.go index 4decc2afd..2580b3e0c 100644 --- a/ci/it/testcases/test_ingest.go +++ b/ci/it/testcases/test_ingest.go @@ -182,7 +182,7 @@ var ( "@timestamp": "DateTime64(3)", "attributes_metadata": "Map(String, String)", "attributes_values": "Map(String, String)", - "category": "Array(String)", + "category": "Array(Nullable(String))", "currency": "Nullable(String)", "customer_first_name": "Nullable(String)", "customer_full_name": "Nullable(String)", @@ -200,28 +200,28 @@ var ( "geoip_location_lat": "Nullable(Float64)", "geoip_location_lon": "Nullable(Float64)", "geoip_region_name": "Nullable(String)", - "manufacturer": "Array(String)", + "manufacturer": "Array(Nullable(String))", "order_date": "DateTime64(3)", "order_id": "Nullable(Int64)", - "products__id": "Array(String)", - "products_base_price": "Array(Float64)", - "products_base_unit_price": "Array(Float64)", - "products_category": "Array(String)", - "products_created_on": "Array(DateTime64(3))", - "products_discount_amount": "Array(Int64)", - "products_discount_percentage": "Array(Int64)", - "products_manufacturer": "Array(String)", - "products_min_price": "Array(Float64)", - "products_price": "Array(Float64)", - "products_product_id": "Array(Int64)", - "products_product_name": "Array(String)", - "products_quantity": "Array(Int64)", - "products_sku": "Array(String)", - "products_tax_amount": "Array(Int64)", - "products_taxful_price": "Array(Float64)", - "products_taxless_price": "Array(Float64)", - "products_unit_discount_amount": "Array(Int64)", - "sku": "Array(String)", + "products__id": "Array(Nullable(String))", + "products_base_price": "Array(Nullable(Float64))", + "products_base_unit_price": "Array(Nullable(Float64))", + "products_category": "Array(Nullable(String))", + "products_created_on": "Array(Nullable(DateTime64(3)))", + "products_discount_amount": "Array(Nullable(Int64))", + "products_discount_percentage": "Array(Nullable(Int64))", + "products_manufacturer": "Array(Nullable(String))", + "products_min_price": "Array(Nullable(Float64))", + "products_price": "Array(Nullable(Float64))", + "products_product_id": "Array(Nullable(Int64))", + "products_product_name": "Array(Nullable(String))", + "products_quantity": "Array(Nullable(Int64))", + "products_sku": "Array(Nullable(String))", + "products_tax_amount": "Array(Nullable(Int64))", + "products_taxful_price": "Array(Nullable(Float64))", + "products_taxless_price": "Array(Nullable(Float64))", + "products_unit_discount_amount": "Array(Nullable(Int64))", + "sku": "Array(Nullable(String))", "taxful_total_price": "Nullable(Float64)", "taxless_total_price": "Nullable(Float64)", "total_quantity": "Nullable(Int64)", From ef29f12117248684fe82fff3cf9b753c5e3c3a4c Mon Sep 17 00:00:00 2001 From: Przemek Delewski Date: Thu, 15 May 2025 14:59:40 +0200 Subject: [PATCH 2/7] Fallback --- ci/it/testcases/test_ingest.go | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/ci/it/testcases/test_ingest.go b/ci/it/testcases/test_ingest.go index 2580b3e0c..4decc2afd 100644 --- a/ci/it/testcases/test_ingest.go +++ b/ci/it/testcases/test_ingest.go @@ -182,7 +182,7 @@ var ( "@timestamp": "DateTime64(3)", "attributes_metadata": "Map(String, String)", "attributes_values": "Map(String, String)", - "category": "Array(Nullable(String))", + "category": "Array(String)", "currency": "Nullable(String)", "customer_first_name": "Nullable(String)", "customer_full_name": "Nullable(String)", @@ -200,28 +200,28 @@ var ( "geoip_location_lat": "Nullable(Float64)", "geoip_location_lon": "Nullable(Float64)", "geoip_region_name": "Nullable(String)", - "manufacturer": "Array(Nullable(String))", + "manufacturer": "Array(String)", "order_date": "DateTime64(3)", "order_id": "Nullable(Int64)", - "products__id": "Array(Nullable(String))", - "products_base_price": "Array(Nullable(Float64))", - "products_base_unit_price": "Array(Nullable(Float64))", - "products_category": "Array(Nullable(String))", - "products_created_on": "Array(Nullable(DateTime64(3)))", - "products_discount_amount": "Array(Nullable(Int64))", - "products_discount_percentage": "Array(Nullable(Int64))", - "products_manufacturer": "Array(Nullable(String))", - "products_min_price": "Array(Nullable(Float64))", - "products_price": "Array(Nullable(Float64))", - "products_product_id": "Array(Nullable(Int64))", - "products_product_name": "Array(Nullable(String))", - "products_quantity": "Array(Nullable(Int64))", - "products_sku": "Array(Nullable(String))", - "products_tax_amount": "Array(Nullable(Int64))", - "products_taxful_price": "Array(Nullable(Float64))", - "products_taxless_price": "Array(Nullable(Float64))", - "products_unit_discount_amount": "Array(Nullable(Int64))", - "sku": "Array(Nullable(String))", + "products__id": "Array(String)", + "products_base_price": "Array(Float64)", + "products_base_unit_price": "Array(Float64)", + "products_category": "Array(String)", + "products_created_on": "Array(DateTime64(3))", + "products_discount_amount": "Array(Int64)", + "products_discount_percentage": "Array(Int64)", + "products_manufacturer": "Array(String)", + "products_min_price": "Array(Float64)", + "products_price": "Array(Float64)", + "products_product_id": "Array(Int64)", + "products_product_name": "Array(String)", + "products_quantity": "Array(Int64)", + "products_sku": "Array(String)", + "products_tax_amount": "Array(Int64)", + "products_taxful_price": "Array(Float64)", + "products_taxless_price": "Array(Float64)", + "products_unit_discount_amount": "Array(Int64)", + "sku": "Array(String)", "taxful_total_price": "Nullable(Float64)", "taxless_total_price": "Nullable(Float64)", "total_quantity": "Nullable(Int64)", From 06de698559743a762d8f7c7bb818de18d3036308 Mon Sep 17 00:00:00 2001 From: Przemek Delewski Date: Thu, 15 May 2025 14:59:51 +0200 Subject: [PATCH 3/7] Fallback --- platform/ingest/parser.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/platform/ingest/parser.go b/platform/ingest/parser.go index a1024c652..3dedbbb10 100644 --- a/platform/ingest/parser.go +++ b/platform/ingest/parser.go @@ -55,24 +55,25 @@ func columnsToString(columnsFromJson []CreateTableEntry, columnMetadata := comment_metadata.NewCommentMetadata() columnMetadata.Values[comment_metadata.ElasticFieldName] = propertyName comment := columnMetadata.Marshall() - if columnFromSchema, found := columnsFromSchema[schema.FieldName(columnFromJson.ClickHouseColumnName)]; found { - // Schema takes precedence over inferred type from JSON + // Check if the type is an Array – if so, fallback to JSON type if strings.Contains(columnFromJson.ClickHouseType, "Array") { // The schema (e.g. PUT /:index/_mapping) doesn't contain information about whether a field is an array or not. // Therefore, we have to combine the information from the schema and the JSON in such case. // For example: in the mapping we have a field "products.name" with type "keyword" (String) // and in the JSON "products.name" is an array of strings (Array(String)). - if strings.Count(columnFromJson.ClickHouseType, "Array") > 1 { logger.Warn().Msgf("Column '%s' has type '%s' - an array nested multiple times. Such case might not be handled correctly.", columnFromJson.ClickHouseColumnName, columnFromJson.ClickHouseType) } - - result.WriteString(fmt.Sprintf("\"%s\" Array(%s) COMMENT '%s'", columnFromSchema.ClickHouseColumnName, columnFromSchema.ClickHouseType, comment)) + result.WriteString(fmt.Sprintf("\"%s\" %s '%s'", columnFromJson.ClickHouseColumnName, columnFromJson.ClickHouseType+" COMMENT ", comment)) + // TODO this should be changed to use the schema type, but needs further investigation + //result.WriteString(fmt.Sprintf("\"%s\" Array(%s) COMMENT '%s'", columnFromSchema.ClickHouseColumnName, columnFromSchema.ClickHouseType, comment)) } else { + // Use schema type result.WriteString(fmt.Sprintf("\"%s\" %s '%s'", columnFromSchema.ClickHouseColumnName, columnFromSchema.ClickHouseType+" COMMENT ", comment)) } } else { + // Not found in schema – fallback to JSON type result.WriteString(fmt.Sprintf("\"%s\" %s '%s'", columnFromJson.ClickHouseColumnName, columnFromJson.ClickHouseType+" COMMENT ", comment)) } From 4c6ea49409a7f318192f203e35fc6f3e35403a76 Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Mon, 19 May 2025 11:00:37 +0200 Subject: [PATCH 4/7] dumb injection of QUESMA VERSION --- .github/workflows/integration-tests.yml | 4 +++- ci/it/testcases/utils.go | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 4188a6bdb..4b5af73db 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -75,7 +75,9 @@ jobs: docker image ls -a - name: Set environment variable - run: echo "EXECUTING_ON_GITHUB_CI=true" >> $GITHUB_ENV + run: | + echo "EXECUTING_ON_GITHUB_CI=true" >> $GITHUB_ENV + echo "QUESMA_IT_VERSION=${{ github.event.inputs.GIT_REF || needs.check-comment.outputs.ref }}" >> $GITHUB_ENV - name: Get last commit author id: get_author diff --git a/ci/it/testcases/utils.go b/ci/it/testcases/utils.go index 1d6772b91..3873cf889 100644 --- a/ci/it/testcases/utils.go +++ b/ci/it/testcases/utils.go @@ -6,6 +6,7 @@ package testcases import ( "bytes" "context" + "errors" "fmt" "github.com/docker/docker/api/types/container" "github.com/testcontainers/testcontainers-go" @@ -181,8 +182,14 @@ func setupQuesma(ctx context.Context, quesmaConfig string) (testcontainers.Conta if err != nil { return nil, err } + + quesmaVersion := os.Getenv("QUESMA_IT_VERSION") + if quesmaVersion == "" { + return nil, errors.New("missing environment variable QUESMA_IT_VERSION") + } // TODO remove + quesmaReq := testcontainers.ContainerRequest{ - Image: "quesma/quesma:nightly", + Image: fmt.Sprintf("quesma/quesma:%s", quesmaVersion), ExposedPorts: []string{"0.0.0.0::9999/tcp", "0.0.0.0::8080/tcp"}, Env: map[string]string{ "QUESMA_CONFIG_FILE": "/configuration/conf.yaml", From eb87b611f038440e9dc507bea4c499fea824175f Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Mon, 19 May 2025 11:05:41 +0200 Subject: [PATCH 5/7] QUESMA_IT_VERSION fallback --- ci/it/testcases/utils.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/it/testcases/utils.go b/ci/it/testcases/utils.go index 3873cf889..e97b56430 100644 --- a/ci/it/testcases/utils.go +++ b/ci/it/testcases/utils.go @@ -6,7 +6,6 @@ package testcases import ( "bytes" "context" - "errors" "fmt" "github.com/docker/docker/api/types/container" "github.com/testcontainers/testcontainers-go" @@ -185,8 +184,9 @@ func setupQuesma(ctx context.Context, quesmaConfig string) (testcontainers.Conta quesmaVersion := os.Getenv("QUESMA_IT_VERSION") if quesmaVersion == "" { - return nil, errors.New("missing environment variable QUESMA_IT_VERSION") - } // TODO remove + log.Println("No QUESMA_IT_VERSION environment variable set, watch out for stale images!") + quesmaVersion = "nightly" + } quesmaReq := testcontainers.ContainerRequest{ Image: fmt.Sprintf("quesma/quesma:%s", quesmaVersion), From 27ecd604549b85624012292c9323bd31c4feb907 Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Mon, 19 May 2025 11:07:34 +0200 Subject: [PATCH 6/7] fix missing docker image tag --- .github/workflows/integration-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 4b5af73db..5802d5809 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -46,6 +46,7 @@ jobs: uses: ./.github/workflows/build-quesma-docker-image.yml with: REF: ${{ github.event.inputs.GIT_REF || needs.check-comment.outputs.ref }} + VERSION: ${{ github.event.inputs.GIT_REF || needs.check-comment.outputs.ref }} integration-test-run: runs-on: ubuntu-latest From 5d837a8502e0e8c2388f3aaf5e5038adcbc7ea83 Mon Sep 17 00:00:00 2001 From: przemyslaw Date: Mon, 19 May 2025 11:42:13 +0200 Subject: [PATCH 7/7] fix potential flakiness candidate --- ci/it/testcases/utils.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/it/testcases/utils.go b/ci/it/testcases/utils.go index e97b56430..9f4eeea72 100644 --- a/ci/it/testcases/utils.go +++ b/ci/it/testcases/utils.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "github.com/docker/docker/api/types/container" + "github.com/docker/go-connections/nat" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" "io" @@ -252,7 +253,10 @@ func setupClickHouse(ctx context.Context) (testcontainers.Container, error) { HostConfigModifier: func(hc *container.HostConfig) { hc.ExtraHosts = []string{"localhost-for-github-ci:host-gateway"} }, - WaitingFor: wait.ForExposedPort().WithStartupTimeout(2 * time.Minute), + WaitingFor: wait.ForSQL("9000", "clickhouse", + func(host string, port nat.Port) string { + return fmt.Sprintf("clickhouse://%s:%d", host, port.Int()) + }).WithStartupTimeout(2 * time.Minute), } return testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ ContainerRequest: req,