Skip to content

bug: extractNormalizedInsertQueryAndColumns does not remove backticks inside a dot-delimited column name #1401

Open
@fidiego

Description

@fidiego

Observed

extractNormalizedInsertQueryAndColumns cleans up leading and trailing backticks in columns such as "events.attributes" but leaves the internal backticks untouched leading to a block cannot be sorted error due to the mismatch.

This affects Nested type columns used via go-gorm.

Expected behaviour

Ideally, extractNormalizedInsertQueryAndColumns should handle this case.

Working `extractNormalizedInsertQueryColumns`

func extractNormalizedInsertQueryAndColumns(query string) (normalizedQuery string, tableName string, columns []string, err error) {
	query = truncateFormat.ReplaceAllString(query, "")
	query = truncateValues.ReplaceAllString(query, "")

	matches := normalizeInsertQueryMatch.FindStringSubmatch(query)
	if len(matches) == 0 {
		err = errors.Errorf("invalid INSERT query: %s", query)
		return
	}

	normalizedQuery = fmt.Sprintf("%s FORMAT Native", matches[1])
	tableName = strings.TrimSpace(matches[2])

	columns = make([]string, 0)
	matches = extractInsertColumnsMatch.FindStringSubmatch(matches[1])
	if len(matches) == 2 {
		columns = strings.Split(matches[1], ",")
		for i := range columns {
			// refers to https://clickhouse.com/docs/en/sql-reference/syntax#identifiers
			// we can use identifiers with double quotes or backticks, for example: "id", `id`, but not both, like `"id"`.
			col := columns[i]
			col = strings.TrimSpace(columns[i]) // trim lead/trail -in whitespace
			col = strings.Trim(col, "\"")       // trim lead/trail -ing double quote
			col = strings.Trim(col, "`")        // trim lead/trial -ing tilde
			// for columns like events.attributes that are otherwise represented like this: events`.`attributes
			col = strings.ReplaceAll(col, "`.`", ".") // replace internal tilde delimited periods w/ plain period
			columns[i] = col
		}
	}

	return
}

`show create traces`

SHOW CREATE TABLE ctl_api.otel_traces

Query id: 69b8f8cf-71c2-4967-8cba-d80945a4be78

   ┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
1. │ CREATE TABLE ctl_api.otel_traces
(
    `id` String,
    `created_by_id` String,
    `created_at` DateTime64(3),
    `updated_at` DateTime64(3),
    `deleted_at` UInt64,
    `runner_id` String,
    `runner_job_id` String,
    `runner_job_execution_id` String,
    `timestamp` DateTime64(9) CODEC(Delta(8), ZSTD(1)),
    `timestamp_date` Date DEFAULT toDate(timestamp),
    `timestamp_time` DateTime DEFAULT toDateTime(timestamp),
    `resource_attributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
    `resource_schema_url` LowCardinality(String) CODEC(ZSTD(1)),
    `scope_name` String CODEC(ZSTD(1)),
    `scope_version` LowCardinality(String) CODEC(ZSTD(1)),
    `scope_attributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
    `scope_dropped_attr_count` Int64,
    `scope_schema_url` LowCardinality(String) CODEC(ZSTD(1)),
    `trace_id` String CODEC(ZSTD(1)),
    `span_id` String CODEC(ZSTD(1)),
    `parent_span_id` String CODEC(ZSTD(1)),
    `trace_state` String CODEC(ZSTD(1)),
    `span_name` LowCardinality(String) CODEC(ZSTD(1)),
    `span_kind` LowCardinality(String) CODEC(ZSTD(1)),
    `service_name` LowCardinality(String) CODEC(ZSTD(1)),
    `span_attributes` Map(LowCardinality(String), String) CODEC(ZSTD(1)),
    `duration` Int64 CODEC(ZSTD(1)),
    `status_code` LowCardinality(String) CODEC(ZSTD(1)),
    `status_message` String CODEC(ZSTD(1)),
    `events.timestamp` Array(DateTime64(9)),
    `events.name` Array(LowCardinality(String)),
    `events.attributes` Array(Map(LowCardinality(String), String)),
    `links.trace_id` Array(String),
    `links.span_id` Array(String),
    `links.span_state` Array(String),
    `links.attributes` Array(Map(LowCardinality(String), String)),
    INDEX idx_trace_id trace_id TYPE bloom_filter(0.001) GRANULARITY 1,
    INDEX idx_span_attr_key mapKeys(resource_attributes) TYPE bloom_filter(0.1) GRANULARITY 1,
    INDEX idx_span_attr_value mapKeys(resource_attributes) TYPE bloom_filter(0.1) GRANULARITY 1,
    INDEX idx_res_attr_key mapKeys(resource_attributes) TYPE bloom_filter(0.1) GRANULARITY 1,
    INDEX idx_res_attr_value mapKeys(resource_attributes) TYPE bloom_filter(0.1) GRANULARITY 1,
    INDEX idx_scope_attr_key mapKeys(resource_attributes) TYPE bloom_filter(0.1) GRANULARITY 1,
    INDEX idx_scope_attr_value mapKeys(resource_attributes) TYPE bloom_filter(0.1) GRANULARITY 1
)
ENGINE = MergeTree
PARTITION BY toDate(timestamp)
ORDER BY (service_name, span_name, toUnixTimestamp(timestamp), trace_id)
TTL toDateTime(timestamp) + toIntervalDay(180)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1 │
   └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

1 row in set. Elapsed: 0.004 sec.

Code example

package code

// given an auto-migrated struct like this

type Trace struct {
	Events         []OtelTraceEvent  `gorm:"type:Nested(timestamp DateTime64(9), name LowCardinality(String), attributes Map(LowCardinality(String), String));"` // order matters, dont't touch nested def
	Links          []OtelTraceLink   `gorm:"type:Nested(trace_id String, span_id String, span_state String, attributes Map(LowCardinality(String), String));"`   // order matters, dont't touch nested def
}

// and a non-migrated struct like this
type TraceIngestion struct {
	EventsTimestamp  []time.Time         `gorm:"type:DateTime64(9);column:events.timestamp"`
	EventsName       []string            `gorm:"type:LowCardinality(String);column:events.name"`
	EventsAttributes []map[string]string `gorm:"type:Map(LowCardinality(String), String);column:events.attributes"`
	LinksTraceID     []string            `gorm:"type:LowCardinality(String);column:links.trace_id"`
	LinksSpanID      []string            `gorm:"type:LowCardinality(String);column:links.span_id"`
	LinksState       []string            `gorm:"type:LowCardinality(String);column:links.span_state"`
	LinksAttributes  []map[string]string `gorm:"type:Map(LowCardinality(String), String);column:links.attributes"`
}

Error log

block cannot be sorted - missing columns in requested order: [events.timestamp events.name events.attributes]"

Details

Environment

  • clickhouse-go version: 2.28.2
  • Interface: go-gorm/clickhouse
  • Go version: 1.22.0
  • Operating system: OS X
  • ClickHouse version: 24.9.1.628
  • Is it a ClickHouse Cloud? No
  • ClickHouse Server non-default settings, if any: -
  • CREATE TABLE statements for tables involved:
  • Sample data for all these tables, use clickhouse-obfuscator if necessary

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions