Skip to content
This repository was archived by the owner on Nov 7, 2025. It is now read-only.

Commit 57e07b7

Browse files
authored
Remove most of parse create table (#1443)
This is stage 2 of removing the parse create table. Not completely, and still no logic to just rely on schema, but it updates all tests as well as vastly reduces the scope of the create table parser. As a side-effect, it bugfixes tables with dots in the names.
1 parent ba0114b commit 57e07b7

17 files changed

+151
-959
lines changed

bin/it.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,10 @@ bin/build-image.sh
77

88
cd ci/it
99

10-
go test -v
10+
if [ -n "$1" ]; then
11+
# Run only tests matching the pattern
12+
go test -v -run "$1"
13+
else
14+
# Run all tests
15+
go test -v
16+
fi

cmd/v2_test_objects.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -388,17 +388,19 @@ func (p *QueryTransformationPipeline) ParseQuery(message any) (*model.ExecutionP
388388
// TODO this is a hack to create a table for the query
389389
// Why parser needs a table?
390390
tableName := "test_table"
391-
table, err := clickhouse.NewTable(`CREATE TABLE `+tableName+`
392-
( "message" String, "@timestamp" DateTime64(3, 'UTC'), "attributes_values" Map(String,String))
393-
ENGINE = Memory`,
394-
clickhouse.NewNoTimestampOnlyStringAttrCHConfig(),
395-
)
396-
if err != nil {
397-
return nil, err
391+
table := clickhouse.Table{
392+
Name: tableName,
393+
DatabaseName: "default",
394+
Cols: map[string]*clickhouse.Column{
395+
"message": {Name: "message", Type: clickhouse.NewBaseType("String")},
396+
"@timestamp": {Name: "@timestamp", Type: clickhouse.NewBaseType("DateTime64")},
397+
"attributes_values": {Name: "attributes_values", Type: clickhouse.NewBaseType("Map(String,String)")},
398+
},
399+
Config: clickhouse.NewNoTimestampOnlyStringAttrCHConfig(),
398400
}
399401
cw := elastic_query_dsl.ClickhouseQueryTranslator{
400402
Ctx: req.OriginalRequest.Context(),
401-
Table: table,
403+
Table: &table,
402404
}
403405
plan, err := cw.ParseQuery(query)
404406
if err != nil {

platform/clickhouse/clickhouse_test.go

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -519,17 +519,11 @@ func TestCreateTableString_1(t *testing.T) {
519519
PrimaryKey: "",
520520
Ttl: "",
521521
Attributes: []Attribute{
522-
NewDefaultInt64Attribute(),
523522
NewDefaultStringAttribute(),
524-
NewDefaultBoolAttribute(),
525523
},
526524
CastUnsupportedAttrValueTypesToString: false,
527525
PreferCastingToOthers: false,
528526
},
529-
Indexes: []IndexStatement{
530-
GetIndexStatement("body"),
531-
GetIndexStatement("severity"),
532-
},
533527
}
534528
expectedRows := []string{
535529
`CREATE TABLE IF NOT EXISTS "/_bulk?refresh=false&_source_includes=originId&require_alias=true_16" (`,
@@ -545,16 +539,8 @@ func TestCreateTableString_1(t *testing.T) {
545539
`"updated_at" DateTime64`,
546540
`),`,
547541
`"@timestamp" DateTime64,`,
548-
`"attributes_int64_key" Array(String),`,
549-
`"attributes_int64_value" Array(Int64),`,
550-
`"attributes_string_key" Array(String),`,
551-
`"attributes_string_value" Array(String),`,
552-
`"attributes_bool_key" Array(String),`,
553-
`"attributes_bool_value" Array(Bool),`,
554542
`"attributes_values" Map(String,String),`,
555-
`"attributes_metadata" Map(String,String),`,
556-
`INDEX body_idx body TYPE tokenbf_v1(10240, 3, 0) GRANULARITY 4,`,
557-
`INDEX severity_idx severity TYPE set(25) GRANULARITY 4`,
543+
`"attributes_metadata" Map(String,String)`,
558544
`)`,
559545
`ENGINE = MergeTree`,
560546
`ORDER BY (@timestamp)`,

platform/clickhouse/parserCreateTable.go

Lines changed: 0 additions & 244 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ package clickhouse
44

55
import (
66
"github.com/QuesmaOrg/quesma/platform/logger"
7-
"strings"
87
"unicode"
98
)
109

@@ -43,16 +42,6 @@ func parseMaybeAndForget(q string, i int, s string) (int, bool) {
4342
return i, false
4443
}
4544

46-
func parseMaybeAndForgetMultiple(q string, i int, ss []string) (int, bool) {
47-
for _, s := range ss {
48-
i2, ok := parseMaybeAndForget(q, i, s)
49-
if ok {
50-
return i2, true
51-
}
52-
}
53-
return i, false
54-
}
55-
5645
func isGoodIdentChar(r rune) bool {
5746
return !unicode.IsSpace(r) && r != ')' && r != '"' && r != '`' && r != ',' && r != '('
5847
}
@@ -127,82 +116,6 @@ func parseIdentWithBrackets(q string, i int) (int, string) {
127116
return -1, ""
128117
}
129118

130-
func parseColumn(q string, i int) (int, Column) {
131-
col := Column{}
132-
i = omitWhitespace(q, i)
133-
// name
134-
quote := `"`
135-
i2 := parseExact(q, i, quote)
136-
if i2 == -1 {
137-
quote = "`"
138-
i2 = parseExact(q, i, quote)
139-
if i2 == -1 {
140-
return -1, col
141-
}
142-
}
143-
i, col.Name = parseIdent(q, i2)
144-
if i == -1 {
145-
return -1, col
146-
}
147-
i = parseExact(q, i, quote)
148-
// type
149-
if i == -1 {
150-
return -1, col
151-
}
152-
i, col.Type = parseNullable(q, i)
153-
if i == -1 {
154-
return -1, col
155-
}
156-
157-
// NULL | NOT NULL
158-
i = omitWhitespace(q, i)
159-
i, _ = parseMaybeAndForgetMultiple(q, i, []string{"NULL", "NOT NULL"})
160-
161-
// DEFAULT | MATERIALIZED | EPHEMERAL | ALIAS expr
162-
i = omitWhitespace(q, i)
163-
i, ok := parseMaybeAndForgetMultiple(q, i, []string{"DEFAULT", "MATERIALIZED", "EPHEMERAL", "ALIAS"})
164-
if ok {
165-
i = omitWhitespace(q, i)
166-
i = parseExpr(q, i)
167-
if i == -1 {
168-
return -1, col
169-
}
170-
i = omitWhitespace(q, i)
171-
}
172-
173-
// CODEC
174-
if i+5 < len(q) && q[i:i+5] == "CODEC" {
175-
i, col.Codec = parseCodec(q, i)
176-
i = omitWhitespace(q, i)
177-
}
178-
179-
// TTL
180-
if i+3 < len(q) && q[i:i+3] == "TTL" {
181-
i = omitWhitespace(q, i+3)
182-
i = parseExpr(q, i)
183-
if i == -1 {
184-
return -1, col
185-
}
186-
i = omitWhitespace(q, i)
187-
}
188-
189-
// COMMENT
190-
if i+7 < len(q) && q[i:i+7] == "COMMENT" {
191-
// TODO should be good enough for now
192-
for {
193-
i++
194-
if q[i] == ',' {
195-
break
196-
}
197-
}
198-
}
199-
200-
if i == -1 || i >= len(q) || (q[i] != ',' && q[i] != ')') {
201-
return -1, col
202-
}
203-
return i, col
204-
}
205-
206119
func parseType(q string, i int) (int, Type) {
207120
i2, name := parseIdent(q, i)
208121
if i == -1 {
@@ -288,160 +201,3 @@ func parseMultiValueType(q string, i int) (int, []*Column) {
288201
i = omitWhitespace(q, j+1)
289202
}
290203
}
291-
292-
func parseCodec(q string, i int) (int, Codec) {
293-
b := i
294-
i = parseExact(q, i, "CODEC")
295-
if i == -1 {
296-
return -1, Codec{}
297-
}
298-
i = omitWhitespace(q, i)
299-
i = parseExact(q, i, "(")
300-
bracketsCnt := 1
301-
for i < len(q) && bracketsCnt > 0 {
302-
if q[i] == '(' {
303-
bracketsCnt++
304-
} else if q[i] == ')' {
305-
bracketsCnt--
306-
}
307-
i++
308-
}
309-
if i >= len(q) {
310-
return -1, Codec{}
311-
}
312-
return i, Codec{Name: q[b:i]}
313-
}
314-
315-
// Kind of hackish, but should work 100% of the time, unless CODEC/TTL/COMMENT
316-
// can be used in expressions (I'd assume they can't)
317-
func parseExpr(q string, i int) int {
318-
bracketsCnt := 0
319-
for i < len(q) {
320-
if q[i] == '(' {
321-
bracketsCnt++
322-
} else if q[i] == ')' {
323-
bracketsCnt--
324-
}
325-
if bracketsCnt < 0 {
326-
return i
327-
}
328-
if bracketsCnt == 0 {
329-
if q[i] == ',' {
330-
return i
331-
}
332-
_, ok := parseMaybeAndForgetMultiple(q, i, []string{"CODEC", "TTL", "COMMENT"})
333-
if ok {
334-
return i
335-
}
336-
if q[i] == ')' {
337-
i2 := omitWhitespace(q, i+1)
338-
if parseExact(q, i2, "ENGINE") != -1 {
339-
return i
340-
}
341-
}
342-
}
343-
i = omitWhitespace(q, i+1)
344-
}
345-
return -1
346-
}
347-
348-
// 0 = success,
349-
// > 0 - fail, char index where failed
350-
// Tuples can be unnamed. In this case they are not supported yet, as I'm not sure
351-
// if it's worth adding right now.
352-
func ParseCreateTable(q string) (*Table, int) {
353-
t := Table{}
354-
355-
// parse header
356-
i := parseExact(q, 0, "CREATE TABLE ")
357-
if i == -1 {
358-
return &t, 1
359-
}
360-
i, _ = parseMaybeAndForget(q, i, "IF NOT EXISTS ")
361-
362-
// parse [db.]table_name
363-
i = omitWhitespace(q, i)
364-
i2 := parseExact(q, i, `"`)
365-
quote := i2 != -1
366-
if quote {
367-
i = i2
368-
}
369-
i2, ident := parseIdent(q, i) // ident = db name or table name
370-
if i2 == -1 {
371-
return &t, i
372-
}
373-
if strings.Contains(ident, ".") { // If it has ".", it means it is DB name
374-
split := strings.Split(ident, ".")
375-
if len(split) > 1 {
376-
t.Name = strings.Join(split[1:], ".")
377-
}
378-
t.DatabaseName = split[0]
379-
} else {
380-
t.Name = ident
381-
}
382-
if quote {
383-
i2 = parseExact(q, i2, `"`)
384-
if i2 == -1 {
385-
return &t, i
386-
}
387-
}
388-
389-
// parse [ON CLUSTER cluster_name]
390-
i3 := parseExact(q, i2, "ON CLUSTER ")
391-
if i3 != -1 {
392-
i3 = omitWhitespace(q, i3)
393-
i4, _ := parseMaybeAndForget(q, i3, `"`) // cluster name can be quoted, but doesn't have to
394-
if i4 != -1 {
395-
i3 = i4
396-
}
397-
i4, ident := parseIdent(q, i3)
398-
if i4 == -1 {
399-
return &t, i3
400-
}
401-
t.ClusterName = ident
402-
if i4 != -1 {
403-
i4, _ = parseMaybeAndForget(q, i4, `"`)
404-
if i4 == -1 {
405-
return &t, i3
406-
}
407-
}
408-
i2 = i4
409-
}
410-
411-
i3 = parseExact(q, i2, "(")
412-
if i3 == -1 {
413-
return &t, i2
414-
}
415-
416-
// parse columns
417-
t.Cols = make(map[string]*Column)
418-
for {
419-
i = omitWhitespace(q, i3)
420-
if parseExact(q, i, "INDEX") != -1 {
421-
return &t, 0
422-
}
423-
i, col := parseColumn(q, i3)
424-
if i == -1 {
425-
return &t, i3
426-
}
427-
t.Cols[col.Name] = &col
428-
i2 = omitWhitespace(q, i)
429-
if i2 == -1 {
430-
return &t, i
431-
}
432-
if q[i2] == ')' {
433-
return &t, 0
434-
} else if q[i2] != ',' {
435-
return &t, i2
436-
} else {
437-
i3 = omitWhitespace(q, i2+1)
438-
if i3 == -1 {
439-
return &t, i2 + 1
440-
} else if q[i3] == ')' {
441-
return &t, 0
442-
} else {
443-
i3 = i2 + 1
444-
}
445-
}
446-
}
447-
}

platform/clickhouse/schema.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -311,16 +311,6 @@ func NewType(value any, valueOrigin string) (Type, error) {
311311
return nil, fmt.Errorf("unsupported type '%T' of value: %v (origin: %s)", value, value, valueOrigin)
312312
}
313313

314-
func NewTable(createTableQuery string, config *ChTableConfig) (*Table, error) {
315-
t, i := ParseCreateTable(createTableQuery)
316-
t.Config = config
317-
if i == 0 {
318-
return t, nil
319-
} else {
320-
return t, fmt.Errorf("error parsing query at character %d, query: %s", i, createTableQuery)
321-
}
322-
}
323-
324314
// NewEmptyTable is used only in tests
325315
func NewEmptyTable(tableName string) *Table {
326316
return &Table{Name: tableName, Config: NewChTableConfigNoAttrs()}

platform/clickhouse/schema_test.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,16 @@ import (
1010

1111
func TestGetDateTimeType(t *testing.T) {
1212
ctx := context.Background()
13-
table, err := NewTable(`CREATE TABLE table (
14-
"timestamp1" DateTime,
15-
"timestamp2" DateTime('UTC'),
16-
"timestamp64_1" DateTime64,
17-
"timestamp64_2" DateTime64(3, 'UTC') ) ENGINE = Memory`, NewChTableConfigTimestampStringAttr())
18-
assert.NoError(t, err)
13+
table := Table{
14+
Name: "table",
15+
Cols: map[string]*Column{
16+
"timestamp1": {Name: "timestamp1", Type: NewBaseType("DateTime")},
17+
"timestamp2": {Name: "timestamp2", Type: NewBaseType("DateTime('UTC')")},
18+
"timestamp64_1": {Name: "timestamp64_1", Type: NewBaseType("DateTime64")},
19+
"timestamp64_2": {Name: "timestamp64_2", Type: NewBaseType("DateTime64(3, 'UTC')")},
20+
},
21+
Config: NewChTableConfigTimestampStringAttr(),
22+
}
1923
assert.Equal(t, DateTime, table.GetDateTimeType(ctx, "timestamp1", true))
2024
assert.Equal(t, DateTime, table.GetDateTimeType(ctx, "timestamp2", true))
2125
assert.Equal(t, DateTime64, table.GetDateTimeType(ctx, "timestamp64_1", true))

0 commit comments

Comments
 (0)