Skip to content

Commit aff6a91

Browse files
committed
fix(clickhouse): do not double-escape JSON-encoded fields
1 parent 846df38 commit aff6a91

File tree

4 files changed

+171
-67
lines changed

4 files changed

+171
-67
lines changed

materialize-clickhouse/.snapshots/TestSQLGeneration

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -108,34 +108,33 @@ SELECT 0::Int32, r.flow_document
108108
--- Begin key_value queryLoadTableNoFlowDocument ---
109109

110110
SELECT 0::Int32,
111-
toJSONString(map(
112-
'key1', r.key1,
113-
'key2', r.key2,
114-
'key!binary', r.`key!binary`,
115-
'array', r.`array`,
116-
'binary', r.binary,
117-
'boolean', r.boolean,
118-
'field_with_projection', r.field_with_projection,
119-
'integer', r.integer,
120-
'integerGt64Bit', r.integerGt64Bit,
121-
'integerWithUserDDL', r.integerWithUserDDL,
122-
'multiple', r.multiple,
123-
'nonAsciiκόσμε', r.`nonAsciiκόσμε`,
124-
'number', r.number,
125-
'numberCastToString', toString(r.numberCastToString),
126-
'object', r.object,
127-
'string', r.string,
128-
'stringInteger', r.stringInteger,
129-
'stringInteger39Chars', r.stringInteger39Chars,
130-
'stringInteger66Chars', r.stringInteger66Chars,
131-
'stringNumber', toString(r.stringNumber)
132-
)) AS flow_document
111+
concat('{',
112+
toJSONString('key1'), ':', ifNull(toJSONString(r.key1), 'null'), ',',
113+
toJSONString('key2'), ':', ifNull(toJSONString(r.key2), 'null'), ',',
114+
toJSONString('key!binary'), ':', ifNull(toJSONString(r.`key!binary`), 'null'), ',',
115+
toJSONString('array'), ':', ifNull(r.`array`, 'null'), ',',
116+
toJSONString('binary'), ':', ifNull(toJSONString(r.binary), 'null'), ',',
117+
toJSONString('boolean'), ':', ifNull(toJSONString(r.boolean), 'null'), ',',
118+
toJSONString('field_with_projection'), ':', ifNull(toJSONString(r.field_with_projection), 'null'), ',',
119+
toJSONString('integer'), ':', ifNull(toJSONString(r.integer), 'null'), ',',
120+
toJSONString('integerGt64Bit'), ':', ifNull(toJSONString(r.integerGt64Bit), 'null'), ',',
121+
toJSONString('integerWithUserDDL'), ':', ifNull(toJSONString(r.integerWithUserDDL), 'null'), ',',
122+
toJSONString('multiple'), ':', ifNull(r.multiple, 'null'), ',',
123+
toJSONString('nonAsciiκόσμε'), ':', ifNull(toJSONString(r.`nonAsciiκόσμε`), 'null'), ',',
124+
toJSONString('number'), ':', ifNull(toJSONString(r.number), 'null'), ',',
125+
toJSONString('numberCastToString'), ':', ifNull(toJSONString(toString(r.numberCastToString)), 'null'), ',',
126+
toJSONString('object'), ':', ifNull(r.object, 'null'), ',',
127+
toJSONString('string'), ':', ifNull(toJSONString(r.string), 'null'), ',',
128+
toJSONString('stringInteger'), ':', ifNull(toJSONString(r.stringInteger), 'null'), ',',
129+
toJSONString('stringInteger39Chars'), ':', ifNull(toJSONString(r.stringInteger39Chars), 'null'), ',',
130+
toJSONString('stringInteger66Chars'), ':', ifNull(toJSONString(r.stringInteger66Chars), 'null'), ',',
131+
toJSONString('stringNumber'), ':', ifNull(toJSONString(toString(r.stringNumber)), 'null')
132+
, '}') AS flow_document
133133
FROM key_value AS r FINAL
134134
JOIN flow_temp_load_0_key_value AS l
135135
ON l.key1 = r.key1
136136
AND l.key2 = r.key2
137137
AND l.`key!binary` = r.`key!binary`
138-
SETTINGS use_variant_as_common_type = 1
139138
--- End key_value queryLoadTableNoFlowDocument ---
140139

141140
--- Begin delta_updates queryLoadTableNoFlowDocument ---

materialize-clickhouse/.snapshots/TestSQLGenerationQuotedTableNames

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -108,34 +108,33 @@ SELECT 0::Int32, r.flow_document
108108
--- Begin `key_value-@你好-``-"especiál` queryLoadTableNoFlowDocument ---
109109

110110
SELECT 0::Int32,
111-
toJSONString(map(
112-
'key1', r.key1,
113-
'key2', r.key2,
114-
'key!binary', r.`key!binary`,
115-
'array', r.`array`,
116-
'binary', r.binary,
117-
'boolean', r.boolean,
118-
'field_with_projection', r.field_with_projection,
119-
'integer', r.integer,
120-
'integerGt64Bit', r.integerGt64Bit,
121-
'integerWithUserDDL', r.integerWithUserDDL,
122-
'multiple', r.multiple,
123-
'nonAsciiκόσμε', r.`nonAsciiκόσμε`,
124-
'number', r.number,
125-
'numberCastToString', toString(r.numberCastToString),
126-
'object', r.object,
127-
'string', r.string,
128-
'stringInteger', r.stringInteger,
129-
'stringInteger39Chars', r.stringInteger39Chars,
130-
'stringInteger66Chars', r.stringInteger66Chars,
131-
'stringNumber', toString(r.stringNumber)
132-
)) AS flow_document
111+
concat('{',
112+
toJSONString('key1'), ':', ifNull(toJSONString(r.key1), 'null'), ',',
113+
toJSONString('key2'), ':', ifNull(toJSONString(r.key2), 'null'), ',',
114+
toJSONString('key!binary'), ':', ifNull(toJSONString(r.`key!binary`), 'null'), ',',
115+
toJSONString('array'), ':', ifNull(r.`array`, 'null'), ',',
116+
toJSONString('binary'), ':', ifNull(toJSONString(r.binary), 'null'), ',',
117+
toJSONString('boolean'), ':', ifNull(toJSONString(r.boolean), 'null'), ',',
118+
toJSONString('field_with_projection'), ':', ifNull(toJSONString(r.field_with_projection), 'null'), ',',
119+
toJSONString('integer'), ':', ifNull(toJSONString(r.integer), 'null'), ',',
120+
toJSONString('integerGt64Bit'), ':', ifNull(toJSONString(r.integerGt64Bit), 'null'), ',',
121+
toJSONString('integerWithUserDDL'), ':', ifNull(toJSONString(r.integerWithUserDDL), 'null'), ',',
122+
toJSONString('multiple'), ':', ifNull(r.multiple, 'null'), ',',
123+
toJSONString('nonAsciiκόσμε'), ':', ifNull(toJSONString(r.`nonAsciiκόσμε`), 'null'), ',',
124+
toJSONString('number'), ':', ifNull(toJSONString(r.number), 'null'), ',',
125+
toJSONString('numberCastToString'), ':', ifNull(toJSONString(toString(r.numberCastToString)), 'null'), ',',
126+
toJSONString('object'), ':', ifNull(r.object, 'null'), ',',
127+
toJSONString('string'), ':', ifNull(toJSONString(r.string), 'null'), ',',
128+
toJSONString('stringInteger'), ':', ifNull(toJSONString(r.stringInteger), 'null'), ',',
129+
toJSONString('stringInteger39Chars'), ':', ifNull(toJSONString(r.stringInteger39Chars), 'null'), ',',
130+
toJSONString('stringInteger66Chars'), ':', ifNull(toJSONString(r.stringInteger66Chars), 'null'), ',',
131+
toJSONString('stringNumber'), ':', ifNull(toJSONString(toString(r.stringNumber)), 'null')
132+
, '}') AS flow_document
133133
FROM `key_value-@你好-``-"especiál` AS r FINAL
134134
JOIN `flow_temp_load_0_key_value-@你好-``-"especiál` AS l
135135
ON l.key1 = r.key1
136136
AND l.key2 = r.key2
137137
AND l.`key!binary` = r.`key!binary`
138-
SETTINGS use_variant_as_common_type = 1
139138
--- End `key_value-@你好-``-"especiál` queryLoadTableNoFlowDocument ---
140139

141140
--- Begin `delta_updates-@你好-``-"especiál` queryLoadTableNoFlowDocument ---

materialize-clickhouse/driver_test.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,3 +1100,106 @@ func TestMovePartitionMissingTarget(t *testing.T) {
11001100
require.ErrorAs(t, moveErr, &exc)
11011101
require.EqualValues(t, chproto.ErrUnknownTable, exc.Code)
11021102
}
1103+
1104+
// TestNoFlowDocumentObjectColumns verifies that root-level OBJECT, ARRAY, and
1105+
// MULTIPLE columns are correctly embedded as JSON objects/arrays (not
1106+
// double-encoded as strings) when reconstructed by the
1107+
// queryLoadTableNoFlowDocument template's toJSONString(map(...)).
1108+
func TestNoFlowDocumentObjectColumns(t *testing.T) {
1109+
var cfg = testConfig()
1110+
cfg.Advanced.NoFlowDocument = true
1111+
var ctx = t.Context()
1112+
var dialect = clickHouseDialect(cfg.Database)
1113+
var tpls = renderTemplates(dialect, cfg.HardDelete)
1114+
var tableName = "test_no_flow_doc_object"
1115+
1116+
var shape = sql.TableShape{
1117+
Path: sql.TablePath{tableName},
1118+
Binding: 0,
1119+
Keys: []sql.Projection{
1120+
{
1121+
Projection: pf.Projection{
1122+
Ptr: "/id",
1123+
Field: "id",
1124+
Inference: pf.Inference{
1125+
Types: []string{"string"},
1126+
Exists: pf.Inference_MUST,
1127+
String_: &pf.Inference_String{},
1128+
},
1129+
},
1130+
},
1131+
},
1132+
Values: []sql.Projection{
1133+
{
1134+
Projection: pf.Projection{
1135+
Ptr: "/_meta",
1136+
Field: "_meta",
1137+
Inference: pf.Inference{
1138+
Types: []string{"object"},
1139+
Exists: pf.Inference_MUST,
1140+
},
1141+
},
1142+
},
1143+
{
1144+
Projection: pf.Projection{
1145+
Ptr: "/tags",
1146+
Field: "tags",
1147+
Inference: pf.Inference{
1148+
Types: []string{"array"},
1149+
Exists: pf.Inference_MUST,
1150+
},
1151+
},
1152+
},
1153+
{
1154+
Projection: pf.Projection{
1155+
Ptr: "/extra",
1156+
Field: "extra",
1157+
Inference: pf.Inference{
1158+
Types: []string{"integer", "object", "boolean"},
1159+
Exists: pf.Inference_MUST,
1160+
},
1161+
},
1162+
},
1163+
flowPublishedAtProjection,
1164+
},
1165+
}
1166+
shape.Values[3].Ptr = "/flow_published_at"
1167+
1168+
table, err := sql.ResolveTable(shape, dialect)
1169+
require.NoError(t, err)
1170+
1171+
b, storeConn, loadConn := setupTable(t, ctx, cfg, dialect, tpls, table, tableName)
1172+
1173+
storeRows(t, ctx, storeConn, b, cfg.Database, []any{
1174+
"k1", // id (key)
1175+
`{"op":"c","extra":"stuff"}`, // _meta (object → String)
1176+
`["tag1","tag2"]`, // tags (array → String)
1177+
`{"nested":true}`, // extra (multiple → String)
1178+
testTime, // flow_published_at
1179+
})
1180+
1181+
docs := loadDocuments(t, ctx, loadConn, b, []any{"k1"})
1182+
require.Len(t, docs, 1)
1183+
1184+
var parsed map[string]json.RawMessage
1185+
require.NoError(t, json.Unmarshal([]byte(docs[0]), &parsed))
1186+
1187+
metaRaw := string(parsed["_meta"])
1188+
tagsRaw := string(parsed["tags"])
1189+
extraRaw := string(parsed["extra"])
1190+
1191+
// OBJECT columns must be embedded as JSON objects, not double-encoded strings.
1192+
require.Truef(t, metaRaw[0] == '{',
1193+
"expected _meta to be a JSON object, got: %s", metaRaw)
1194+
require.JSONEq(t, `{"op":"c","extra":"stuff"}`, metaRaw)
1195+
1196+
// ARRAY columns must be embedded as JSON arrays.
1197+
require.Truef(t, tagsRaw[0] == '[',
1198+
"expected tags to be a JSON array, got: %s", tagsRaw)
1199+
require.JSONEq(t, `["tag1","tag2"]`, tagsRaw)
1200+
1201+
// MULTIPLE columns must be embedded as their actual JSON type.
1202+
require.Truef(t, extraRaw[0] == '{',
1203+
"expected extra to be a JSON object, got: %s", extraRaw)
1204+
require.JSONEq(t, `{"nested":true}`, extraRaw)
1205+
}

materialize-clickhouse/sqlgen.go

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -257,44 +257,47 @@ SELECT {{ $.Binding }}::Int32, r.{{$.Document.Identifier}}
257257
{{ end -}}
258258
{{ end }}
259259
260-
-- Templated query for no_flow_document mode - reconstructs JSON from root-level columns
261-
-- using toJSONString(map(...)) with SETTINGS use_variant_as_common_type = 1 to allow
262-
-- mixed-type map values.
263-
--
264-
-- toJSONString(map(...)) handles most types correctly, including Nullable columns
265-
-- (serialized as null). Four types need pre-processing before going into the map:
266-
-- - DateTime64: toJSONString formats as "YYYY-MM-DD HH:MM:SS.ffffff" which isn't
267-
-- RFC3339; we formatDateTime and append 'Z' to produce a proper string value.
268-
-- - STRING_NUMBER: stored as Float64 but must appear as a JSON string; toString()
269-
-- converts it so the map serializes it quoted.
270-
271-
{{ define "mapValue" -}}
260+
-- Templated query for no_flow_document mode - reconstructs JSON from root-level
261+
-- columns using concat(). Each column value is individually serialized:
262+
-- - OBJECT, ARRAY, MULTIPLE: stored as String containing valid JSON, embedded
263+
-- directly via ifNull(col, 'null') so they appear as proper JSON
264+
-- objects/arrays rather than double-encoded strings.
265+
-- - DateTime64: formatDateTime produces RFC3339, wrapped in toJSONString for
266+
-- proper quoting.
267+
-- - STRING_NUMBER: stored as Float64 but must appear as a JSON string;
268+
-- toString() converts before toJSONString quotes it.
269+
-- - All other types: toJSONString handles correct JSON serialization
270+
-- (quoting strings, bare numbers/bools).
271+
-- Nullable columns use ifNull(..., 'null') to emit JSON null.
272+
273+
{{ define "concatValue" -}}
272274
{{ $ident := printf "%s.%s" $.Alias $.Identifier }}
273-
{{- if and (eq $.AsFlatType "string") (eq $.Format "date-time") -}}
274-
formatDateTime({{ $ident }}, '%Y-%m-%dT%H:%i:%S.%f', 'UTC') || 'Z'
275+
{{- if or (eq $.AsFlatType "object") (eq $.AsFlatType "array") (eq $.AsFlatType "multiple") -}}
276+
ifNull({{ $ident }}, 'null')
277+
{{- else if and (eq $.AsFlatType "string") (eq $.Format "date-time") -}}
278+
ifNull(toJSONString(formatDateTime({{ $ident }}, '%Y-%m-%dT%H:%i:%S.%f', 'UTC') || 'Z'), 'null')
275279
{{- else if eq $.AsFlatType "string_number" -}}
276-
toString({{ $ident }})
280+
ifNull(toJSONString(toString({{ $ident }})), 'null')
277281
{{- else -}}
278-
{{ $ident }}
282+
ifNull(toJSONString({{ $ident }}), 'null')
279283
{{- end -}}
280284
{{- end }}
281285
282286
{{ define "queryLoadTableNoFlowDocument" }}
283287
{{ if not $.DeltaUpdates -}}
284288
SELECT {{ $.Binding }}::Int32,
285-
toJSONString(map(
289+
concat('{',
286290
{{- range $i, $col := $.RootLevelColumns -}}
287-
{{- if $i }},{{ end }}
288-
{{ Literal $col.Field }}, {{ template "mapValue" (ColumnWithAlias $col "r") }}
291+
{{- if $i }}, ',', {{ end }}
292+
toJSONString({{ Literal $col.Field }}), ':', {{ template "concatValue" (ColumnWithAlias $col "r") }}
289293
{{- end }}
290-
)) AS flow_document
294+
, '}') AS flow_document
291295
FROM {{$.Identifier}} AS r FINAL
292296
JOIN {{ template "loadTableName" . }} AS l
293297
{{- range $ind, $key := $.Keys }}
294298
{{ if $ind }} AND {{ else }} ON {{ end -}}
295299
l.{{$key.Identifier}} = r.{{$key.Identifier}}
296300
{{- end }}
297-
SETTINGS use_variant_as_common_type = 1
298301
{{ else -}}
299302
SELECT * FROM (SELECT -1::Int32, ''::String LIMIT 0) as nodoc
300303
{{ end -}}

0 commit comments

Comments
 (0)