benchmark: fix TTFT unit label and achieved RPS extraction

kahilam · kahilam · commit e64fe7a67d26 · 2026-04-03T20:05:35.000-07:00
- TTFT mean in PR comment showed "ms" but value was already divided
  by 1000 (should be "s")
- Achieved RPS was always 0.00 because GuideLLM may not expose
  rate.completed_rate; add fallback: completed_requests / duration

Made-with: Cursor
diff --git a/.github/workflows/ci-benchmark.yaml b/.github/workflows/ci-benchmark.yaml
@@ -1074,7 +1074,7 @@ jobs:
             | **Avg vLLM Queue Depth** | ${hpa.avg_queue_depth.toFixed(1)} | **${wva.avg_queue_depth.toFixed(1)}** | ${delta(hpa.avg_queue_depth, wva.avg_queue_depth)} |
             | **Avg EPP Queue Depth** | ${(hpa.avg_epp_queue_depth||0).toFixed(1)} | **${(wva.avg_epp_queue_depth||0).toFixed(1)}** | ${delta(hpa.avg_epp_queue_depth||0, wva.avg_epp_queue_depth||0)} |
             | **Avg KV Cache** | ${hpa.avg_kv_cache.toFixed(3)} | ${wva.avg_kv_cache.toFixed(3)} | ${delta(hpa.avg_kv_cache, wva.avg_kv_cache)} |
-            | **TTFT mean** | ${fmtM(hpa.ttft, 1000)}ms | **${fmtM(wva.ttft, 1000)}ms** | ${hpa.ttft && wva.ttft ? delta(hpa.ttft.mean, wva.ttft.mean) : '—'} |
+            | **TTFT mean** | ${fmtM(hpa.ttft, 1000)}s | **${fmtM(wva.ttft, 1000)}s** | ${hpa.ttft && wva.ttft ? delta(hpa.ttft.mean, wva.ttft.mean) : '—'} |
             | **TTFT p50** | ${fmtP(hpa.ttft, 'p50', 1000)}s | **${fmtP(wva.ttft, 'p50', 1000)}s** | — |
             | **TTFT p99** | ${fmtP(hpa.ttft, 'p99', 1000)}s | **${fmtP(wva.ttft, 'p99', 1000)}s** | — |
             | **ITL mean** | ${fmtM(hpa.itl, 1, 2)}ms | **${fmtM(wva.itl, 1, 2)}ms** | ${hpa.itl && wva.itl ? delta(hpa.itl.mean, wva.itl.mean) : '—'} |
diff --git a/test/benchmark/prefill_heavy_benchmark_test.go b/test/benchmark/prefill_heavy_benchmark_test.go
@@ -651,7 +651,7 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
 		}
 
 		// Extract error counts and achieved RPS from GuideLLM output
-		var errorCount, incompleteCount int
+		var errorCount, incompleteCount, completedCount int
 		var achievedRPS float64
 		if guidellmRaw != nil {
 			var parsed map[string]interface{}
@@ -666,6 +666,9 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
 								if f, ok := rt["incomplete"].(float64); ok {
 									incompleteCount = int(f)
 								}
+								if f, ok := rt["successful"].(float64); ok {
+									completedCount = int(f)
+								}
 							}
 						}
 						if rateObj, ok := bm["rate"].(map[string]interface{}); ok {
@@ -677,6 +680,9 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"
 				}
 			}
 		}
+		if achievedRPS == 0 && completedCount > 0 && loadDuration > 0 {
+			achievedRPS = float64(completedCount) / loadDuration
+		}
 
 		result := PrefillResult{
 			AutoscalerType:   autoscalerType,

Original file line number	Diff line number	Diff line change
`@@ -651,7 +651,7 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"`
`651`	`651`	`}`
`652`	`652`
`653`	`653`	`// Extract error counts and achieved RPS from GuideLLM output`
`654`		`- var errorCount, incompleteCount int`
	`654`	`+ var errorCount, incompleteCount, completedCount int`
`655`	`655`	`var achievedRPS float64`
`656`	`656`	`if guidellmRaw != nil {`
`657`	`657`	`var parsed map[string]interface{}`
`@@ -666,6 +666,9 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"`
`666`	`666`	`if f, ok := rt["incomplete"].(float64); ok {`
`667`	`667`	`incompleteCount = int(f)`
`668`	`668`	`}`
	`669`	`+ if f, ok := rt["successful"].(float64); ok {`
	`670`	`+ completedCount = int(f)`
	`671`	`+ }`
`669`	`672`	`}`
`670`	`673`	`}`
`671`	`674`	`if rateObj, ok := bm["rate"].(map[string]interface{}); ok {`
`@@ -677,6 +680,9 @@ var _ = Describe("Prefill Heavy Workload Benchmark", Label("benchmark", "phase4"`
`677`	`680`	`}`
`678`	`681`	`}`
`679`	`682`	`}`
	`683`	`+ if achievedRPS == 0 && completedCount > 0 && loadDuration > 0 {`
	`684`	`+ achievedRPS = float64(completedCount) / loadDuration`
	`685`	`+ }`
`680`	`686`
`681`	`687`	`result := PrefillResult{`
`682`	`688`	`AutoscalerType: autoscalerType,`