Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion bin/it.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,10 @@ bin/build-image.sh

cd ci/it

go test -v
if [ -n "$1" ]; then
# Run only tests matching the pattern
go test -v -run "$1"
else
# Run all tests
go test -v
fi
18 changes: 10 additions & 8 deletions cmd/v2_test_objects.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,17 +388,19 @@ func (p *QueryTransformationPipeline) ParseQuery(message any) (*model.ExecutionP
// TODO this is a hack to create a table for the query
// Why parser needs a table?
tableName := "test_table"
table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+`
( "message" String, "@timestamp" DateTime64(3, 'UTC'), "attributes_values" Map(String,String))
ENGINE = Memory`,
clickhouse.NewNoTimestampOnlyStringAttrCHConfig(),
)
if err != nil {
return nil, err
table := clickhouse.Table{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still like the idea of having this behind constructor, esp. given that this repeats in few places.

Of course I'd make clickhouse.NewTable() take table name and list of columns not the CREATE TABLE ... string 😉

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally agree, but likely in next PR.

Name: tableName,
DatabaseName: "default",
Cols: map[string]*clickhouse.Column{
"message": {Name: "message", Type: clickhouse.NewBaseType("String")},
"@timestamp": {Name: "@timestamp", Type: clickhouse.NewBaseType("DateTime64")},
"attributes_values": {Name: "attributes_values", Type: clickhouse.NewBaseType("Map(String,String)")},
},
Config: clickhouse.NewNoTimestampOnlyStringAttrCHConfig(),
}
cw := elastic_query_dsl.ClickhouseQueryTranslator{
Ctx: req.OriginalRequest.Context(),
Table: table,
Table: &table,
}
plan, err := cw.ParseQuery(query)
if err != nil {
Expand Down
16 changes: 1 addition & 15 deletions platform/clickhouse/clickhouse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -519,17 +519,11 @@ func TestCreateTableString_1(t *testing.T) {
PrimaryKey: "",
Ttl: "",
Attributes: []Attribute{
NewDefaultInt64Attribute(),
NewDefaultStringAttribute(),
NewDefaultBoolAttribute(),
},
CastUnsupportedAttrValueTypesToString: false,
PreferCastingToOthers: false,
},
Indexes: []IndexStatement{
GetIndexStatement("body"),
GetIndexStatement("severity"),
},
}
expectedRows := []string{
`CREATE TABLE IF NOT EXISTS "/_bulk?refresh=false&_source_includes=originId&require_alias=true_16" (`,
Expand All @@ -545,16 +539,8 @@ func TestCreateTableString_1(t *testing.T) {
`"updated_at" DateTime64`,
`),`,
`"@timestamp" DateTime64,`,
`"attributes_int64_key" Array(String),`,
`"attributes_int64_value" Array(Int64),`,
`"attributes_string_key" Array(String),`,
`"attributes_string_value" Array(String),`,
`"attributes_bool_key" Array(String),`,
`"attributes_bool_value" Array(Bool),`,
`"attributes_values" Map(String,String),`,
`"attributes_metadata" Map(String,String),`,
`INDEX body_idx body TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,`,
`INDEX severity_idx severity TYPE set(25) GRANULARITY 4`,
`"attributes_metadata" Map(String,String)`,
`)`,
`ENGINE = MergeTree`,
`ORDER BY (@timestamp)`,
Expand Down
244 changes: 0 additions & 244 deletions platform/clickhouse/parserCreateTable.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ package clickhouse

import (
"github.com/QuesmaOrg/quesma/platform/logger"
"strings"
"unicode"
)

Expand Down Expand Up @@ -43,16 +42,6 @@ func parseMaybeAndForget(q string, i int, s string) (int, bool) {
return i, false
}

func parseMaybeAndForgetMultiple(q string, i int, ss []string) (int, bool) {
for _, s := range ss {
i2, ok := parseMaybeAndForget(q, i, s)
if ok {
return i2, true
}
}
return i, false
}

func isGoodIdentChar(r rune) bool {
return !unicode.IsSpace(r) && r != ')' && r != '"' && r != '`' && r != ',' && r != '('
}
Expand Down Expand Up @@ -127,82 +116,6 @@ func parseIdentWithBrackets(q string, i int) (int, string) {
return -1, ""
}

func parseColumn(q string, i int) (int, Column) {
col := Column{}
i = omitWhitespace(q, i)
// name
quote := `"`
i2 := parseExact(q, i, quote)
if i2 == -1 {
quote = "`"
i2 = parseExact(q, i, quote)
if i2 == -1 {
return -1, col
}
}
i, col.Name = parseIdent(q, i2)
if i == -1 {
return -1, col
}
i = parseExact(q, i, quote)
// type
if i == -1 {
return -1, col
}
i, col.Type = parseNullable(q, i)
if i == -1 {
return -1, col
}

// NULL | NOT NULL
i = omitWhitespace(q, i)
i, _ = parseMaybeAndForgetMultiple(q, i, []string{"NULL", "NOT NULL"})

// DEFAULT | MATERIALIZED | EPHEMERAL | ALIAS expr
i = omitWhitespace(q, i)
i, ok := parseMaybeAndForgetMultiple(q, i, []string{"DEFAULT", "MATERIALIZED", "EPHEMERAL", "ALIAS"})
if ok {
i = omitWhitespace(q, i)
i = parseExpr(q, i)
if i == -1 {
return -1, col
}
i = omitWhitespace(q, i)
}

// CODEC
if i+5 < len(q) && q[i:i+5] == "CODEC" {
i, col.Codec = parseCodec(q, i)
i = omitWhitespace(q, i)
}

// TTL
if i+3 < len(q) && q[i:i+3] == "TTL" {
i = omitWhitespace(q, i+3)
i = parseExpr(q, i)
if i == -1 {
return -1, col
}
i = omitWhitespace(q, i)
}

// COMMENT
if i+7 < len(q) && q[i:i+7] == "COMMENT" {
// TODO should be good enough for now
for {
i++
if q[i] == ',' {
break
}
}
}

if i == -1 || i >= len(q) || (q[i] != ',' && q[i] != ')') {
return -1, col
}
return i, col
}

func parseType(q string, i int) (int, Type) {
i2, name := parseIdent(q, i)
if i == -1 {
Expand Down Expand Up @@ -288,160 +201,3 @@ func parseMultiValueType(q string, i int) (int, []*Column) {
i = omitWhitespace(q, j+1)
}
}

func parseCodec(q string, i int) (int, Codec) {
b := i
i = parseExact(q, i, "CODEC")
if i == -1 {
return -1, Codec{}
}
i = omitWhitespace(q, i)
i = parseExact(q, i, "(")
bracketsCnt := 1
for i < len(q) && bracketsCnt > 0 {
if q[i] == '(' {
bracketsCnt++
} else if q[i] == ')' {
bracketsCnt--
}
i++
}
if i >= len(q) {
return -1, Codec{}
}
return i, Codec{Name: q[b:i]}
}

// Kind of hackish, but should work 100% of the time, unless CODEC/TTL/COMMENT
// can be used in expressions (I'd assume they can't)
func parseExpr(q string, i int) int {
bracketsCnt := 0
for i < len(q) {
if q[i] == '(' {
bracketsCnt++
} else if q[i] == ')' {
bracketsCnt--
}
if bracketsCnt < 0 {
return i
}
if bracketsCnt == 0 {
if q[i] == ',' {
return i
}
_, ok := parseMaybeAndForgetMultiple(q, i, []string{"CODEC", "TTL", "COMMENT"})
if ok {
return i
}
if q[i] == ')' {
i2 := omitWhitespace(q, i+1)
if parseExact(q, i2, "ENGINE") != -1 {
return i
}
}
}
i = omitWhitespace(q, i+1)
}
return -1
}

// 0 = success,
// > 0 - fail, char index where failed
// Tuples can be unnamed. In this case they are not supported yet, as I'm not sure
// if it's worth adding right now.
func ParseCreateTable(q string) (*Table, int) {
t := Table{}

// parse header
i := parseExact(q, 0, "CREATE TABLE ")
if i == -1 {
return &t, 1
}
i, _ = parseMaybeAndForget(q, i, "IF NOT EXISTS ")

// parse [db.]table_name
i = omitWhitespace(q, i)
i2 := parseExact(q, i, `"`)
quote := i2 != -1
if quote {
i = i2
}
i2, ident := parseIdent(q, i) // ident = db name or table name
if i2 == -1 {
return &t, i
}
if strings.Contains(ident, ".") { // If it has ".", it means it is DB name
split := strings.Split(ident, ".")
if len(split) > 1 {
t.Name = strings.Join(split[1:], ".")
}
t.DatabaseName = split[0]
} else {
t.Name = ident
}
if quote {
i2 = parseExact(q, i2, `"`)
if i2 == -1 {
return &t, i
}
}

// parse [ON CLUSTER cluster_name]
i3 := parseExact(q, i2, "ON CLUSTER ")
if i3 != -1 {
i3 = omitWhitespace(q, i3)
i4, _ := parseMaybeAndForget(q, i3, `"`) // cluster name can be quoted, but doesn't have to
if i4 != -1 {
i3 = i4
}
i4, ident := parseIdent(q, i3)
if i4 == -1 {
return &t, i3
}
t.ClusterName = ident
if i4 != -1 {
i4, _ = parseMaybeAndForget(q, i4, `"`)
if i4 == -1 {
return &t, i3
}
}
i2 = i4
}

i3 = parseExact(q, i2, "(")
if i3 == -1 {
return &t, i2
}

// parse columns
t.Cols = make(map[string]*Column)
for {
i = omitWhitespace(q, i3)
if parseExact(q, i, "INDEX") != -1 {
return &t, 0
}
i, col := parseColumn(q, i3)
if i == -1 {
return &t, i3
}
t.Cols[col.Name] = &col
i2 = omitWhitespace(q, i)
if i2 == -1 {
return &t, i
}
if q[i2] == ')' {
return &t, 0
} else if q[i2] != ',' {
return &t, i2
} else {
i3 = omitWhitespace(q, i2+1)
if i3 == -1 {
return &t, i2 + 1
} else if q[i3] == ')' {
return &t, 0
} else {
i3 = i2 + 1
}
}
}
}
10 changes: 0 additions & 10 deletions platform/clickhouse/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,16 +311,6 @@ func NewType(value any, valueOrigin string) (Type, error) {
return nil, fmt.Errorf("unsupported type '%T' of value: %v (origin: %s)", value, value, valueOrigin)
}

func NewTable(createTableQuery string, config *ChTableConfig) (*Table, error) {
t, i := ParseCreateTable(createTableQuery)
t.Config = config
if i == 0 {
return t, nil
} else {
return t, fmt.Errorf("error parsing query at character %d, query: %s", i, createTableQuery)
}
}

// NewEmptyTable is used only in tests
func NewEmptyTable(tableName string) *Table {
return &Table{Name: tableName, Config: NewChTableConfigNoAttrs()}
Expand Down
16 changes: 10 additions & 6 deletions platform/clickhouse/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ import (

func TestGetDateTimeType(t *testing.T) {
ctx := context.Background()
table, err := NewTable(`CREATE TABLE table (
"timestamp1" DateTime,
"timestamp2" DateTime('UTC'),
"timestamp64_1" DateTime64,
"timestamp64_2" DateTime64(3, 'UTC') ) ENGINE = Memory`, NewChTableConfigTimestampStringAttr())
assert.NoError(t, err)
table := Table{
Name: "table",
Cols: map[string]*Column{
"timestamp1": {Name: "timestamp1", Type: NewBaseType("DateTime")},
"timestamp2": {Name: "timestamp2", Type: NewBaseType("DateTime('UTC')")},
"timestamp64_1": {Name: "timestamp64_1", Type: NewBaseType("DateTime64")},
"timestamp64_2": {Name: "timestamp64_2", Type: NewBaseType("DateTime64(3, 'UTC')")},
},
Config: NewChTableConfigTimestampStringAttr(),
}
assert.Equal(t, DateTime, table.GetDateTimeType(ctx, "timestamp1", true))
assert.Equal(t, DateTime, table.GetDateTimeType(ctx, "timestamp2", true))
assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "timestamp64_1", true))
Expand Down
Loading