Skip to content

Commit 101399d

Browse files
security: use parameterized queries for ClickHouse model comparison (#5563)
User-provided model names and provider values were directly concatenated into ClickHouse SQL queries via string interpolation, enabling SQL injection. Replaced string concatenation with ClickHouse parameterized query placeholders ({val_N:String}) bound through the client's safe parameter binding mechanism. Co-authored-by: kolega.dev <faizan@kolega.ai>
1 parent dbc143e commit 101399d

File tree

1 file changed

+56
-36
lines changed

1 file changed

+56
-36
lines changed

valhalla/jawn/src/managers/ModelComparisonManager.ts

Lines changed: 56 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,25 @@ interface CostQueryResult {
6262
export class ModelComparisonManager {
6363
constructor() {}
6464

65+
private buildModelNameParams(
66+
names: string[],
67+
offset: number
68+
): { placeholders: string; params: string[] } {
69+
const placeholders = names
70+
.map((_, i) => `{val_${offset + i}:String}`)
71+
.join(", ");
72+
return { placeholders: `(${placeholders})`, params: [...names] };
73+
}
74+
6575
public async getModelComparison(
6676
modelsToCompare: ModelsToCompare[]
6777
): Promise<Result<Model[], string>> {
6878
// Map over all models to get their info in parallel
6979
const modelResults = await Promise.all(
7080
modelsToCompare.map((model) => {
71-
const formattedModelNames = `('${model.names.join("','")}')`;
7281
return this.getModelInfo(
7382
model.parent,
74-
formattedModelNames,
83+
model.names,
7584
model.provider
7685
);
7786
})
@@ -90,11 +99,19 @@ export class ModelComparisonManager {
9099

91100
private async getModelInfo(
92101
model: string,
93-
formattedModelNames: string,
102+
modelNames: string[],
94103
provider: string
95104
): Promise<Result<Model, string>> {
96105
const normalizedProvider = provider.toUpperCase();
97106

107+
// Each query uses params [modelName0, ..., modelNameN, provider]
108+
// Provider is always the last parameter
109+
const providerParamIndex = modelNames.length;
110+
const { placeholders: modelPlaceholders, params: modelParams } =
111+
this.buildModelNameParams(modelNames, 0);
112+
const providerPlaceholder = `{val_${providerParamIndex}:String}`;
113+
const queryParams: string[] = [...modelParams, normalizedProvider];
114+
98115
const [
99116
metricsResult,
100117
geoResult,
@@ -103,24 +120,24 @@ export class ModelComparisonManager {
103120
geoTtftResult,
104121
] = await Promise.all([
105122
clickhouseDb.dbQuery<ModelMetricsQueryResult>(
106-
this.getModelMetricsQuery(formattedModelNames, normalizedProvider),
107-
[]
123+
this.getModelMetricsQuery(modelPlaceholders, providerPlaceholder),
124+
queryParams
108125
),
109126
clickhouseDb.dbQuery<GeographicLatencyQueryResult>(
110-
this.getGeographicLatencyQuery(formattedModelNames, normalizedProvider),
111-
[]
127+
this.getGeographicLatencyQuery(modelPlaceholders, providerPlaceholder),
128+
queryParams
112129
),
113130
clickhouseDb.dbQuery<TimeSeriesQueryResult>(
114-
this.getTimeSeriesQuery(formattedModelNames, normalizedProvider),
115-
[]
131+
this.getTimeSeriesQuery(modelPlaceholders, providerPlaceholder),
132+
queryParams
116133
),
117134
clickhouseDb.dbQuery<CostQueryResult>(
118-
this.getCostQuery(formattedModelNames, normalizedProvider),
119-
[]
135+
this.getCostQuery(modelPlaceholders, providerPlaceholder),
136+
queryParams
120137
),
121138
clickhouseDb.dbQuery<GeographicTtftQueryResult>(
122-
this.getGeographicTtftQuery(formattedModelNames, normalizedProvider),
123-
[]
139+
this.getGeographicTtftQuery(modelPlaceholders, providerPlaceholder),
140+
queryParams
124141
),
125142
]);
126143

@@ -210,14 +227,14 @@ export class ModelComparisonManager {
210227
}
211228

212229
private getModelMetricsQuery(
213-
formattedModelNames: string,
214-
provider: string
230+
modelPlaceholders: string,
231+
providerPlaceholder: string
215232
): string {
216233
return `
217234
SELECT
218235
model,
219236
provider,
220-
237+
221238
-- Latency stats
222239
avg(if(latency > 0 AND completion_tokens > 0, latency / completion_tokens * 1000, null)) as average_latency_per_1000_tokens,
223240
median(if(latency > 0 AND completion_tokens > 0, latency / completion_tokens * 1000, null)) as median_latency_per_1000_tokens,
@@ -227,7 +244,7 @@ export class ModelComparisonManager {
227244
quantile(0.95)(if(latency > 0 AND completion_tokens > 0, latency / completion_tokens * 1000, null)) as p95_latency_per_1000_tokens,
228245
quantile(0.99)(if(latency > 0 AND completion_tokens > 0, latency / completion_tokens * 1000, null)) as p99_latency_per_1000_tokens,
229246
countIf(latency > 0 AND completion_tokens > 0) as valid_latency_count,
230-
247+
231248
-- TTFT stats
232249
avg(if(time_to_first_token > 0, time_to_first_token, null)) as average_ttft,
233250
median(if(time_to_first_token > 0, time_to_first_token, null)) as median_ttft,
@@ -246,20 +263,20 @@ export class ModelComparisonManager {
246263
247264
-- Feedback counts
248265
countIf(has(scores, 'helicone-score-feedback')) as total_feedback,
249-
coalesce(countIf(has(scores, 'helicone-score-feedback') AND scores['helicone-score-feedback'] = 1) /
266+
coalesce(countIf(has(scores, 'helicone-score-feedback') AND scores['helicone-score-feedback'] = 1) /
250267
nullIf(total_feedback, 0), 0) as positive_percentage,
251-
coalesce(countIf(has(scores, 'helicone-score-feedback') AND scores['helicone-score-feedback'] = 0) /
268+
coalesce(countIf(has(scores, 'helicone-score-feedback') AND scores['helicone-score-feedback'] = 0) /
252269
nullIf(total_feedback, 0), 0) as negative_percentage
253270
FROM request_response_rmt
254-
WHERE model IN ${formattedModelNames}
255-
AND upper(provider) = '${provider}'
271+
WHERE model IN ${modelPlaceholders}
272+
AND upper(provider) = ${providerPlaceholder}
256273
AND request_created_at >= now() - INTERVAL 30 DAY
257274
GROUP BY model, provider`;
258275
}
259276

260277
private getGeographicLatencyQuery(
261-
formattedModelNames: string,
262-
provider: string
278+
modelPlaceholders: string,
279+
providerPlaceholder: string
263280
): string {
264281
return `
265282
SELECT
@@ -273,34 +290,34 @@ export class ModelComparisonManager {
273290
quantile(0.99)(if(completion_tokens > 0, latency / completion_tokens * 1000, null)) as p99_latency_per_1000_tokens,
274291
median(if(completion_tokens > 0, (latency / completion_tokens) * 1000, null)) as median_latency_per_1000_tokens
275292
FROM request_response_rmt
276-
WHERE model IN ${formattedModelNames}
277-
AND upper(provider) = '${provider}'
293+
WHERE model IN ${modelPlaceholders}
294+
AND upper(provider) = ${providerPlaceholder}
278295
AND request_created_at >= now() - INTERVAL 30 DAY
279296
AND country_code != ''
280297
AND latency > 0
281298
GROUP BY country_code`;
282299
}
283300

284301
private getGeographicTtftQuery(
285-
formattedModelNames: string,
286-
provider: string
302+
modelPlaceholders: string,
303+
providerPlaceholder: string
287304
): string {
288305
return `
289306
SELECT
290307
country_code,
291308
median(time_to_first_token) as median_ttft
292309
FROM request_response_rmt
293-
WHERE model IN ${formattedModelNames}
294-
AND upper(provider) = '${provider}'
310+
WHERE model IN ${modelPlaceholders}
311+
AND upper(provider) = ${providerPlaceholder}
295312
AND request_created_at >= now() - INTERVAL 30 DAY
296313
AND country_code != ''
297314
AND time_to_first_token > 0
298315
GROUP BY country_code`;
299316
}
300317

301318
private getTimeSeriesQuery(
302-
formattedModelNames: string,
303-
provider: string
319+
modelPlaceholders: string,
320+
providerPlaceholder: string
304321
): string {
305322
return `
306323
WITH daily_stats AS (
@@ -312,8 +329,8 @@ export class ModelComparisonManager {
312329
countIf(status < 500) / count(*) as success_rate,
313330
countIf(status >= 500) / count(*) as error_rate
314331
FROM request_response_rmt
315-
WHERE model IN ${formattedModelNames}
316-
AND upper(provider) = '${provider}'
332+
WHERE model IN ${modelPlaceholders}
333+
AND upper(provider) = ${providerPlaceholder}
317334
AND request_created_at >= now() - INTERVAL 30 DAY
318335
GROUP BY timestamp
319336
HAVING total_requests >= 100
@@ -332,14 +349,17 @@ export class ModelComparisonManager {
332349
STEP INTERVAL 1 DAY`;
333350
}
334351

335-
private getCostQuery(formattedModelNames: string, provider: string): string {
352+
private getCostQuery(
353+
modelPlaceholders: string,
354+
providerPlaceholder: string
355+
): string {
336356
return `
337357
SELECT
338358
avg(prompt_tokens) as avg_input_tokens,
339359
avg(completion_tokens) as avg_output_tokens
340360
FROM request_response_rmt
341-
WHERE model IN ${formattedModelNames}
342-
AND upper(provider) = '${provider}'
361+
WHERE model IN ${modelPlaceholders}
362+
AND upper(provider) = ${providerPlaceholder}
343363
AND request_created_at >= now() - INTERVAL 30 DAY`;
344364
}
345365
}

0 commit comments

Comments
 (0)