Skip to content

Commit 02e3325

Browse files
committed
add sql parser
1 parent c26de5c commit 02e3325

File tree

4 files changed

+283
-147
lines changed

4 files changed

+283
-147
lines changed

bttest/cmv.go

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
package bttest
22

33
import (
4-
"fmt"
54
"log"
6-
"regexp"
7-
"strconv"
85
"strings"
96
)
107

@@ -29,92 +26,6 @@ type CMVConfig struct {
2926
AppendSourceKey bool `json:"append_source_key,omitempty"`
3027
}
3128

32-
// ParseCMVConfigFromSQL attempts to extract a CMVConfig from a CMV SQL query.
33-
// It parses the standard Bigtable CMV SQL pattern:
34-
//
35-
// SELECT SPLIT(_key, '#')[SAFE_OFFSET(n)] AS col, ..., _key AS source_key, family AS family
36-
// FROM `table_name`
37-
// ORDER BY col1, col2, ..., source_key
38-
//
39-
// _key (optionally aliased) in ORDER BY sets AppendSourceKey = true.
40-
func ParseCMVConfigFromSQL(viewID, query string) (*CMVConfig, error) {
41-
cfg := &CMVConfig{
42-
ViewID: viewID,
43-
}
44-
45-
fromRe := regexp.MustCompile("(?i)FROM\\s+`([^`]+)`")
46-
fromMatch := fromRe.FindStringSubmatch(query)
47-
if fromMatch == nil {
48-
return nil, fmt.Errorf("could not parse FROM clause in CMV query")
49-
}
50-
cfg.SourceTable = fromMatch[1]
51-
52-
// Extract column aliases and their SAFE_OFFSET indices from SELECT.
53-
offsetRe := regexp.MustCompile(`SPLIT\(_key,\s*'([^']+)'\)\[SAFE_OFFSET\((\d+)\)\].*?\bAS\s+\w+\)\s+AS\s+(\w+)|SPLIT\(_key,\s*'([^']+)'\)\[SAFE_OFFSET\((\d+)\)\]\s+AS\s+(\w+)`)
54-
matches := offsetRe.FindAllStringSubmatch(query, -1)
55-
if len(matches) == 0 {
56-
return nil, fmt.Errorf("could not parse SPLIT/SAFE_OFFSET expressions in CMV query")
57-
}
58-
59-
colMap := make(map[string]int) // alias → SAFE_OFFSET index
60-
for _, m := range matches {
61-
if m[1] != "" {
62-
cfg.KeySeparator = m[1]
63-
idx, _ := strconv.Atoi(m[2])
64-
colMap[m[3]] = idx
65-
} else {
66-
cfg.KeySeparator = m[4]
67-
idx, _ := strconv.Atoi(m[5])
68-
colMap[m[6]] = idx
69-
}
70-
}
71-
72-
// Detect "_key AS <alias>" in SELECT so we can recognise the alias in ORDER BY.
73-
var sourceKeyAlias string
74-
keyAliasRe := regexp.MustCompile(`(?i)\b_key\s+AS\s+(\w+)`)
75-
if m := keyAliasRe.FindStringSubmatch(query); m != nil {
76-
sourceKeyAlias = m[1]
77-
}
78-
79-
// Extract ORDER BY columns; _key or its alias sets AppendSourceKey.
80-
orderRe := regexp.MustCompile(`(?i)ORDER\s+BY\s+(.+)$`)
81-
orderMatch := orderRe.FindStringSubmatch(strings.TrimSpace(query))
82-
if orderMatch == nil {
83-
return nil, fmt.Errorf("could not parse ORDER BY clause in CMV query")
84-
}
85-
orderCols := strings.Split(orderMatch[1], ",")
86-
for _, col := range orderCols {
87-
col = strings.TrimSpace(col)
88-
if col == "_key" || (sourceKeyAlias != "" && col == sourceKeyAlias) {
89-
cfg.AppendSourceKey = true
90-
continue
91-
}
92-
idx, ok := colMap[col]
93-
if !ok {
94-
return nil, fmt.Errorf("ORDER BY column %q not found in SELECT", col)
95-
}
96-
cfg.KeyMapping = append(cfg.KeyMapping, idx)
97-
}
98-
99-
if len(cfg.KeyMapping) == 0 {
100-
return nil, fmt.Errorf("no key mapping columns found in ORDER BY")
101-
}
102-
103-
// Extract included column families from SELECT (pattern: family AS family).
104-
famRe := regexp.MustCompile(`(?:,\s*)(\w+)\s+AS\s+(\w+)`)
105-
famMatches := famRe.FindAllStringSubmatch(query, -1)
106-
for _, m := range famMatches {
107-
src, alias := m[1], m[2]
108-
if src == alias && src != "_key" {
109-
if _, isCol := colMap[src]; !isCol {
110-
cfg.IncludeFamilies = append(cfg.IncludeFamilies, src)
111-
}
112-
}
113-
}
114-
115-
return cfg, nil
116-
}
117-
11829
// cmvRegistry maps plain source table IDs to CMV definitions.
11930
// Lookups match by table ID suffix against fully-qualified table names.
12031
type cmvRegistry struct {

bttest/cmv_test.go

Lines changed: 28 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,23 @@ func newTestServerWithCMV(t *testing.T, configs []CMVConfig) *server {
3535
}
3636

3737
func TestCMVTransformKey(t *testing.T) {
38-
// key_mapping [3,4,1,2,0] + append_source_key:
39-
// CMV key = parts[3]#parts[4]#parts[1]#parts[2]#parts[0]#<full_source_key>
38+
// key_mapping [2,3,1,0] + append_source_key:
39+
// CMV key = parts[2]#parts[3]#parts[1]#parts[0]#<full_source_key>
4040
inst := &cmvInstance{
4141
config: CMVConfig{
42-
SourceTable: "events",
43-
ViewID: "events_by_account",
42+
SourceTable: "sensor_readings",
43+
ViewID: "readings_by_region",
4444
KeySeparator: "#",
45-
KeyMapping: []int{3, 4, 1, 2, 0},
45+
KeyMapping: []int{2, 3, 1, 0},
4646
AppendSourceKey: true,
4747
},
4848
}
4949

50-
sourceKey := "item-abc#9999999#type-x#region-a#account-42#src-01"
50+
sourceKey := "device-1#ts-100#us-east#user-42"
5151
got := inst.transformKey(sourceKey)
52-
// parts: [0]item-abc [1]9999999 [2]type-x [3]region-a [4]account-42 [5]src-01
53-
// mapped: parts[3]#parts[4]#parts[1]#parts[2]#parts[0] + full source key
54-
want := "region-a#account-42#9999999#type-x#item-abc#item-abc#9999999#type-x#region-a#account-42#src-01"
52+
// parts: [0]device-1 [1]ts-100 [2]us-east [3]user-42
53+
// mapped: parts[2]#parts[3]#parts[1]#parts[0] + full source key
54+
want := "us-east#user-42#ts-100#device-1#device-1#ts-100#us-east#user-42"
5555
assert.Equal(t, want, got)
5656
}
5757

@@ -261,8 +261,8 @@ func TestCMVDropRowRangeAll(t *testing.T) {
261261
func TestCMVTransformKeyOutOfBounds(t *testing.T) {
262262
inst := &cmvInstance{
263263
config: CMVConfig{
264-
SourceTable: "events",
265-
ViewID: "events_by_account",
264+
SourceTable: "my_table",
265+
ViewID: "my_view",
266266
KeySeparator: "#",
267267
KeyMapping: []int{0, 99}, // index 99 is out of bounds
268268
},
@@ -272,32 +272,6 @@ func TestCMVTransformKeyOutOfBounds(t *testing.T) {
272272
assert.Equal(t, "only#", got)
273273
}
274274

275-
func TestParseCMVConfigFromSQL(t *testing.T) {
276-
// Standard Bigtable CMV SQL pattern: plain SPLIT (no CAST), _key aliased,
277-
// alias appears in ORDER BY to set AppendSourceKey = true.
278-
query := `SELECT
279-
SPLIT(_key, '#')[SAFE_OFFSET(3)] AS region,
280-
SPLIT(_key, '#')[SAFE_OFFSET(4)] AS account_id,
281-
SPLIT(_key, '#')[SAFE_OFFSET(1)] AS timestamp,
282-
SPLIT(_key, '#')[SAFE_OFFSET(2)] AS type,
283-
SPLIT(_key, '#')[SAFE_OFFSET(0)] AS item_id,
284-
_key AS src_key,
285-
cf1 AS cf1,
286-
cf2 AS cf2
287-
FROM ` + "`events`" + `
288-
ORDER BY region, account_id, timestamp, type, item_id, src_key`
289-
290-
cfg, err := ParseCMVConfigFromSQL("events_by_account", query)
291-
require.NoError(t, err)
292-
assert.Equal(t, "events", cfg.SourceTable)
293-
assert.Equal(t, "events_by_account", cfg.ViewID)
294-
assert.Equal(t, "#", cfg.KeySeparator)
295-
assert.Equal(t, []int{3, 4, 1, 2, 0}, cfg.KeyMapping)
296-
assert.True(t, cfg.AppendSourceKey)
297-
assert.Contains(t, cfg.IncludeFamilies, "cf1")
298-
assert.Contains(t, cfg.IncludeFamilies, "cf2")
299-
}
300-
301275
func TestCreateMaterializedViewRPC(t *testing.T) {
302276
ctx := context.Background()
303277
dbFilename := newDBFile(t)
@@ -317,64 +291,60 @@ func TestCreateMaterializedViewRPC(t *testing.T) {
317291

318292
parent := "projects/test/instances/test"
319293
mvSQL := "SELECT\n" +
320-
" SPLIT(_key, '#')[SAFE_OFFSET(3)] AS region,\n" +
321-
" SPLIT(_key, '#')[SAFE_OFFSET(4)] AS account_id,\n" +
294+
" SPLIT(_key, '#')[SAFE_OFFSET(2)] AS region,\n" +
295+
" SPLIT(_key, '#')[SAFE_OFFSET(3)] AS user_id,\n" +
322296
" SPLIT(_key, '#')[SAFE_OFFSET(1)] AS ts,\n" +
323-
" SPLIT(_key, '#')[SAFE_OFFSET(2)] AS typ,\n" +
324-
" SPLIT(_key, '#')[SAFE_OFFSET(0)] AS item_id,\n" +
297+
" SPLIT(_key, '#')[SAFE_OFFSET(0)] AS device_id,\n" +
325298
" _key AS src_key,\n" +
326-
" cf1 AS cf1\n" +
327-
"FROM `events`\n" +
328-
"ORDER BY region, account_id, ts, typ, item_id, src_key"
299+
" data AS data\n" +
300+
"FROM `sensor_readings`\n" +
301+
"ORDER BY region, user_id, ts, device_id, src_key"
329302

330303
op, err := s.CreateMaterializedView(ctx, &btapb.CreateMaterializedViewRequest{
331304
Parent: parent,
332-
MaterializedViewId: "events_by_account",
305+
MaterializedViewId: "readings_by_region",
333306
MaterializedView: &btapb.MaterializedView{Query: mvSQL},
334307
})
335308
require.NoError(t, err)
336309
assert.True(t, op.Done)
337310

338-
// GetMaterializedView should return the stored view.
339311
mv, err := s.GetMaterializedView(ctx, &btapb.GetMaterializedViewRequest{
340-
Name: parent + "/materializedViews/events_by_account",
312+
Name: parent + "/materializedViews/readings_by_region",
341313
})
342314
require.NoError(t, err)
343-
assert.Equal(t, "events_by_account", mv.Name[strings.LastIndex(mv.Name, "/")+1:])
315+
assert.Equal(t, "readings_by_region", mv.Name[strings.LastIndex(mv.Name, "/")+1:])
344316
assert.Equal(t, mvSQL, mv.Query)
345317

346-
// ListMaterializedViews should include the new view.
347318
list, err := s.ListMaterializedViews(ctx, &btapb.ListMaterializedViewsRequest{Parent: parent})
348319
require.NoError(t, err)
349320
assert.Len(t, list.MaterializedViews, 1)
350321

351-
// CMV should now fire on writes to the source table.
322+
// CMV should fire on writes to the source table.
352323
_, err = s.CreateTable(ctx, &btapb.CreateTableRequest{
353324
Parent: parent,
354-
TableId: "events",
355-
Table: &btapb.Table{ColumnFamilies: map[string]*btapb.ColumnFamily{"cf1": {}}},
325+
TableId: "sensor_readings",
326+
Table: &btapb.Table{ColumnFamilies: map[string]*btapb.ColumnFamily{"data": {}}},
356327
})
357328
require.NoError(t, err)
358329

359330
_, err = s.MutateRow(ctx, &btpb.MutateRowRequest{
360-
TableName: parent + "/tables/events",
361-
RowKey: []byte("item-abc#9999999#type-x#region-a#account-42"),
331+
TableName: parent + "/tables/sensor_readings",
332+
RowKey: []byte("device-1#ts-100#us-east#user-42"),
362333
Mutations: []*btpb.Mutation{{
363334
Mutation: &btpb.Mutation_SetCell_{SetCell: &btpb.Mutation_SetCell{
364-
FamilyName: "cf1", ColumnQualifier: []byte("col1"), Value: []byte("v"),
335+
FamilyName: "data", ColumnQualifier: []byte("temp"), Value: []byte("72"),
365336
}},
366337
}},
367338
})
368339
require.NoError(t, err)
369340

370-
// The CMV shadow table should have been created and contain the re-keyed row.
371-
fqCMV := parent + "/tables/events_by_account"
341+
fqCMV := parent + "/tables/readings_by_region"
372342
cmvTbl := s.tables[fqCMV]
373343
require.NotNil(t, cmvTbl, "CMV shadow table should have been auto-created")
374344

375345
// DeleteMaterializedView should remove the view.
376346
_, err = s.DeleteMaterializedView(ctx, &btapb.DeleteMaterializedViewRequest{
377-
Name: parent + "/materializedViews/events_by_account",
347+
Name: parent + "/materializedViews/readings_by_region",
378348
})
379349
require.NoError(t, err)
380350
list, err = s.ListMaterializedViews(ctx, &btapb.ListMaterializedViewsRequest{Parent: parent})

0 commit comments

Comments
 (0)