Skip to content

Commit e64fe7a

Browse files
committed
benchmark: fix TTFT unit label and achieved RPS extraction
- TTFT mean in PR comment showed "ms" but value was already divided by 1000 (should be "s") - Achieved RPS was always 0.00 because GuideLLM may not expose rate.completed_rate; add fallback: completed_requests / duration Made-with: Cursor
1 parent 3b0ceb2 commit e64fe7a

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

.github/workflows/ci-benchmark.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,7 @@ jobs:
10741074
| **Avg vLLM Queue Depth** | ${hpa.avg_queue_depth.toFixed(1)} | **${wva.avg_queue_depth.toFixed(1)}** | ${delta(hpa.avg_queue_depth, wva.avg_queue_depth)} |
10751075
| **Avg EPP Queue Depth** | ${(hpa.avg_epp_queue_depth||0).toFixed(1)} | **${(wva.avg_epp_queue_depth||0).toFixed(1)}** | ${delta(hpa.avg_epp_queue_depth||0, wva.avg_epp_queue_depth||0)} |
10761076
| **Avg KV Cache** | ${hpa.avg_kv_cache.toFixed(3)} | ${wva.avg_kv_cache.toFixed(3)} | ${delta(hpa.avg_kv_cache, wva.avg_kv_cache)} |
1077-
| **TTFT mean** | ${fmtM(hpa.ttft, 1000)}ms | **${fmtM(wva.ttft, 1000)}ms** | ${hpa.ttft && wva.ttft ? delta(hpa.ttft.mean, wva.ttft.mean) : '—'} |
1077+
| **TTFT mean** | ${fmtM(hpa.ttft, 1000)}s | **${fmtM(wva.ttft, 1000)}s** | ${hpa.ttft && wva.ttft ? delta(hpa.ttft.mean, wva.ttft.mean) : '—'} |
10781078
| **TTFT p50** | ${fmtP(hpa.ttft, 'p50', 1000)}s | **${fmtP(wva.ttft, 'p50', 1000)}s** | — |
10791079
| **TTFT p99** | ${fmtP(hpa.ttft, 'p99', 1000)}s | **${fmtP(wva.ttft, 'p99', 1000)}s** | — |
10801080
| **ITL mean** | ${fmtM(hpa.itl, 1, 2)}ms | **${fmtM(wva.itl, 1, 2)}ms** | ${hpa.itl && wva.itl ? delta(hpa.itl.mean, wva.itl.mean) : '—'} |

test/benchmark/prefill_heavy_benchmark_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
651651
}
652652

653653
// Extract error counts and achieved RPS from GuideLLM output
654-
var errorCount, incompleteCount int
654+
var errorCount, incompleteCount, completedCount int
655655
var achievedRPS float64
656656
if guidellmRaw != nil {
657657
var parsed map[string]interface{}
@@ -666,6 +666,9 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
666666
if f, ok := rt["incomplete"].(float64); ok {
667667
incompleteCount = int(f)
668668
}
669+
if f, ok := rt["successful"].(float64); ok {
670+
completedCount = int(f)
671+
}
669672
}
670673
}
671674
if rateObj, ok := bm["rate"].(map[string]interface{}); ok {
@@ -677,6 +680,9 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
677680
}
678681
}
679682
}
683+
if achievedRPS == 0 && completedCount > 0 && loadDuration > 0 {
684+
achievedRPS = float64(completedCount) / loadDuration
685+
}
680686

681687
result := PrefillResult{
682688
AutoscalerType: autoscalerType,

0 commit comments

Comments
 (0)