|
20 | 20 | TTFT_P90 = "TTFT P90 (ms)" |
21 | 21 | TTFT_P99 = "TTFT P99 (ms)" |
22 | 22 | TTFT_P999 = "TTFT P99.9 (ms)" |
23 | | -TPOT = "TPOT (ms)" |
| 23 | +TPOT_MEDIAN = "TPOT Median (ms)" |
24 | 24 | INTVTY_MEDIAN = "Intvty Median (tok/s/user)" |
25 | | -INTVTY_P90 = "Intvty P90 (tok/s/user)" |
26 | | -INTVTY_P99 = "Intvty P99 (tok/s/user)" |
27 | | -INTVTY_P999 = "Intvty P99.9 (tok/s/user)" |
| 25 | +INTVTY_AT_P90_TPOT = "Intvty at P90 TPOT (tok/s/user)" |
| 26 | +INTVTY_AT_P99_TPOT = "Intvty at P99 TPOT (tok/s/user)" |
| 27 | +INTVTY_AT_P999_TPOT = "Intvty at P99.9 TPOT (tok/s/user)" |
28 | 28 | E2EL_MEDIAN = "E2EL Median (s)" |
29 | 29 | E2EL_P90 = "E2EL P90 (s)" |
30 | 30 | E2EL_P99 = "E2EL P99 (s)" |
@@ -85,8 +85,8 @@ def main(): |
85 | 85 | MODEL, SERVED_MODEL, HARDWARE, FRAMEWORK, PRECISION, ISL, OSL, TP, EP, DP_ATTENTION, |
86 | 86 | CONC, |
87 | 87 | TTFT_MEDIAN, TTFT_P90, TTFT_P99, TTFT_P999, |
88 | | - TPOT, |
89 | | - INTVTY_MEDIAN, INTVTY_P90, INTVTY_P99, INTVTY_P999, |
| 88 | + TPOT_MEDIAN, |
| 89 | + INTVTY_MEDIAN, INTVTY_AT_P90_TPOT, INTVTY_AT_P99_TPOT, INTVTY_AT_P999_TPOT, |
90 | 90 | E2EL_MEDIAN, E2EL_P90, E2EL_P99, E2EL_P999, |
91 | 91 | TPUT_PER_GPU, OUTPUT_TPUT_PER_GPU, INPUT_TPUT_PER_GPU |
92 | 92 | ] |
@@ -139,8 +139,8 @@ def main(): |
139 | 139 | DECODE_TP, DECODE_EP, DECODE_DP_ATTN, DECODE_WORKERS, DECODE_GPUS, |
140 | 140 | CONC, |
141 | 141 | TTFT_MEDIAN, TTFT_P90, TTFT_P99, TTFT_P999, |
142 | | - TPOT, |
143 | | - INTVTY_MEDIAN, INTVTY_P90, INTVTY_P99, INTVTY_P999, |
| 142 | + TPOT_MEDIAN, |
| 143 | + INTVTY_MEDIAN, INTVTY_AT_P90_TPOT, INTVTY_AT_P99_TPOT, INTVTY_AT_P999_TPOT, |
144 | 144 | E2EL_MEDIAN, E2EL_P90, E2EL_P99, E2EL_P999, |
145 | 145 | TPUT_PER_GPU, OUTPUT_TPUT_PER_GPU, INPUT_TPUT_PER_GPU |
146 | 146 | ] |
|
0 commit comments