Skip to content

Commit 165c8ed

Browse files
authored
set up PT x vLLM regression config (#7684)
Summary: As title, this should connect regressions to the newly created GH issue. Using 1.20 and 0.8 as thresholds. Test Plan: Local run with the following: ``` python aws/lambda/benchmark_regression_summary_report/lambda_function.py --clickhouse-endpoint ${CLICKHOUSE_ENDPOINT} --clickhouse-username ${DEV_USERNAME} --clickhouse-password ${CLICKHOUSE_PASSWORD} --config-id pytorch_x_vllm_benchmark ``` Ran both yesterday and today, and run today was sufficient to trigger thresholds for regressions, so 20% seems appropriate here. Reviewers: Subscribers: Tasks: Tags:
1 parent 6232f61 commit 165c8ed

File tree

1 file changed

+93
-0
lines changed
  • aws/lambda/benchmark_regression_summary_report/common

1 file changed

+93
-0
lines changed

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,11 +291,104 @@
291291
),
292292
)
293293

294+
PYTORCH_X_VLLM_BENCHMARK_CONFIG = BenchmarkConfig(
295+
name="PyTorch x vLLM Benchmark Regression",
296+
id="pytorch_x_vllm_benchmark",
297+
source=BenchmarkApiSource(
298+
api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series",
299+
type="benchmark_time_series_api",
300+
api_endpoint_params_template="""
301+
{
302+
"name": "pytorch_x_vllm_benchmark",
303+
"query_params": {
304+
"mode": "",
305+
"branches": ["main"],
306+
"repo": "pytorch/pytorch",
307+
"device": "",
308+
"benchmarkName": "PyTorch x vLLM benchmark",
309+
"startTime": "{{ startTime }}",
310+
"stopTime": "{{ stopTime }}"
311+
},
312+
"response_formats":["time_series"]
313+
}
314+
""",
315+
),
316+
hud_info={
317+
"url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_x_vllm_benchmark",
318+
},
319+
policy=Policy(
320+
frequency=Frequency(value=1, unit="days"),
321+
range=RangeConfig(
322+
baseline=DayRangeWindow(value=3),
323+
comparison=DayRangeWindow(value=3),
324+
),
325+
metrics={
326+
"latency": RegressionPolicy(
327+
name="latency",
328+
condition="less_equal",
329+
threshold=1.20,
330+
baseline_aggregation="median",
331+
),
332+
"median_itl_ms": RegressionPolicy(
333+
name="median_itl_ms",
334+
condition="less_equal",
335+
threshold=1.20,
336+
baseline_aggregation="median",
337+
),
338+
"median_tpot_ms": RegressionPolicy(
339+
name="median_tpot_ms",
340+
condition="less_equal",
341+
threshold=1.20,
342+
baseline_aggregation="median",
343+
),
344+
"median_ttft_ms": RegressionPolicy(
345+
name="median_ttft_ms",
346+
condition="less_equal",
347+
threshold=1.20,
348+
baseline_aggregation="median",
349+
),
350+
"requests_per_second": RegressionPolicy(
351+
name="requests_per_second",
352+
condition="greater_equal",
353+
threshold=0.8,
354+
baseline_aggregation="median",
355+
),
356+
"tokens_per_second": RegressionPolicy(
357+
name="tokens_per_second",
358+
condition="greater_equal",
359+
threshold=0.8,
360+
baseline_aggregation="median",
361+
),
362+
},
363+
notification_config={
364+
"configs": [
365+
{
366+
"type": "github",
367+
"repo": "pytorch/test-infra",
368+
"issue": "7676",
369+
"condition": {
370+
"type": "device_arch",
371+
"device_arches": [
372+
{"device": "cuda", "arch": "NVIDIA H100 80GB HBM3"},
373+
{"device": "cuda", "arch": "NVIDIA B200"},
374+
],
375+
},
376+
}
377+
]
378+
},
379+
),
380+
report_config=ReportConfig(
381+
report_level="no_regression",
382+
),
383+
)
384+
385+
294386
BENCHMARK_REGRESSION_CONFIG = BenchmarkRegressionConfigBook(
295387
configs={
296388
"compiler_regression": COMPILER_BENCHMARK_CONFIG,
297389
"pytorch_operator_microbenchmark": PYTORCH_OPERATOR_MICROBENCH_CONFIG,
298390
"pytorch_helion": PYTORCH_HELION_CONFIG,
391+
"pytorch_x_vllm_benchmark": PYTORCH_X_VLLM_BENCHMARK_CONFIG,
299392
"torchao_micro_api_benchmark": TORCHAO_MICRO_API_CONFIG,
300393
}
301394
)

0 commit comments

Comments
 (0)