1616EP = "EP"
1717DP_ATTENTION = "DP Attention"
1818CONC = "Conc"
19- TTFT = "TTFT (ms)"
19+ TTFT_MEDIAN = "TTFT Median (ms)"
20+ TTFT_P90 = "TTFT P90 (ms)"
21+ TTFT_P99 = "TTFT P99 (ms)"
22+ TTFT_P999 = "TTFT P99.9 (ms)"
2023TPOT = "TPOT (ms)"
21- INTERACTIVITY = "Interactivity (tok/s/user)"
22- E2EL = "E2EL (s)"
24+ INTVTY_MEDIAN = "Intvty Median (tok/s/user)"
25+ INTVTY_P90 = "Intvty P90 (tok/s/user)"
26+ INTVTY_P99 = "Intvty P99 (tok/s/user)"
27+ INTVTY_P999 = "Intvty P99.9 (tok/s/user)"
28+ E2EL_MEDIAN = "E2EL Median (s)"
29+ E2EL_P90 = "E2EL P90 (s)"
30+ E2EL_P99 = "E2EL P99 (s)"
31+ E2EL_P999 = "E2EL P99.9 (s)"
2332TPUT_PER_GPU = "TPUT per GPU"
2433OUTPUT_TPUT_PER_GPU = "Output TPUT per GPU"
2534INPUT_TPUT_PER_GPU = "Input TPUT per GPU"
@@ -74,7 +83,12 @@ def main():
7483
7584 single_node_headers = [
7685 MODEL , SERVED_MODEL , HARDWARE , FRAMEWORK , PRECISION , ISL , OSL , TP , EP , DP_ATTENTION ,
77- CONC , TTFT , TPOT , INTERACTIVITY , E2EL , TPUT_PER_GPU , OUTPUT_TPUT_PER_GPU , INPUT_TPUT_PER_GPU
86+ CONC ,
87+ TTFT_MEDIAN , TTFT_P90 , TTFT_P99 , TTFT_P999 ,
88+ TPOT ,
89+ INTVTY_MEDIAN , INTVTY_P90 , INTVTY_P99 , INTVTY_P999 ,
90+ E2EL_MEDIAN , E2EL_P90 , E2EL_P99 , E2EL_P999 ,
91+ TPUT_PER_GPU , OUTPUT_TPUT_PER_GPU , INPUT_TPUT_PER_GPU
7892 ]
7993
8094 single_node_rows = [
@@ -91,9 +105,18 @@ def main():
91105 r ['dp_attention' ],
92106 r ['conc' ],
93107 f"{ r ['median_ttft' ] * 1000 :.4f} " ,
108+ f"{ r .get ('p90_ttft' , 0 ) * 1000 :.4f} " ,
109+ f"{ r .get ('p99_ttft' , 0 ) * 1000 :.4f} " ,
110+ f"{ r .get ('p99.9_ttft' , 0 ) * 1000 :.4f} " ,
94111 f"{ r ['median_tpot' ] * 1000 :.4f} " ,
95- f"{ r ['median_intvty' ]:.4f} " ,
96- f"{ r ['median_e2el' ]:.4f} " ,
112+ f"{ r .get ('median_intvty' , 0 ):.4f} " ,
113+ f"{ r .get ('p90_intvty' , 0 ):.4f} " ,
114+ f"{ r .get ('p99_intvty' , 0 ):.4f} " ,
115+ f"{ r .get ('p99.9_intvty' , 0 ):.4f} " ,
116+ f"{ r .get ('median_e2el' , 0 ):.4f} " ,
117+ f"{ r .get ('p90_e2el' , 0 ):.4f} " ,
118+ f"{ r .get ('p99_e2el' , 0 ):.4f} " ,
119+ f"{ r .get ('p99.9_e2el' , 0 ):.4f} " ,
97120 f"{ r ['tput_per_gpu' ]:.4f} " ,
98121 f"{ r ['output_tput_per_gpu' ]:.4f} " ,
99122 f"{ r ['input_tput_per_gpu' ]:.4f} " ,
@@ -114,7 +137,12 @@ def main():
114137 MODEL , SERVED_MODEL , HARDWARE , FRAMEWORK , PRECISION , ISL , OSL ,
115138 PREFILL_TP , PREFILL_EP , PREFILL_DP_ATTN , PREFILL_WORKERS , PREFILL_GPUS ,
116139 DECODE_TP , DECODE_EP , DECODE_DP_ATTN , DECODE_WORKERS , DECODE_GPUS ,
117- CONC , TTFT , TPOT , INTERACTIVITY , E2EL , TPUT_PER_GPU , OUTPUT_TPUT_PER_GPU , INPUT_TPUT_PER_GPU
140+ CONC ,
141+ TTFT_MEDIAN , TTFT_P90 , TTFT_P99 , TTFT_P999 ,
142+ TPOT ,
143+ INTVTY_MEDIAN , INTVTY_P90 , INTVTY_P99 , INTVTY_P999 ,
144+ E2EL_MEDIAN , E2EL_P90 , E2EL_P99 , E2EL_P999 ,
145+ TPUT_PER_GPU , OUTPUT_TPUT_PER_GPU , INPUT_TPUT_PER_GPU
118146 ]
119147
120148 multinode_rows = [
@@ -138,9 +166,18 @@ def main():
138166 r ['num_decode_gpu' ],
139167 r ['conc' ],
140168 f"{ r ['median_ttft' ] * 1000 :.4f} " ,
169+ f"{ r .get ('p90_ttft' , 0 ) * 1000 :.4f} " ,
170+ f"{ r .get ('p99_ttft' , 0 ) * 1000 :.4f} " ,
171+ f"{ r .get ('p99.9_ttft' , 0 ) * 1000 :.4f} " ,
141172 f"{ r ['median_tpot' ] * 1000 :.4f} " ,
142- f"{ r ['median_intvty' ]:.4f} " ,
143- f"{ r ['median_e2el' ]:.4f} " ,
173+ f"{ r .get ('median_intvty' , 0 ):.4f} " ,
174+ f"{ r .get ('p90_intvty' , 0 ):.4f} " ,
175+ f"{ r .get ('p99_intvty' , 0 ):.4f} " ,
176+ f"{ r .get ('p99.9_intvty' , 0 ):.4f} " ,
177+ f"{ r .get ('median_e2el' , 0 ):.4f} " ,
178+ f"{ r .get ('p90_e2el' , 0 ):.4f} " ,
179+ f"{ r .get ('p99_e2el' , 0 ):.4f} " ,
180+ f"{ r .get ('p99.9_e2el' , 0 ):.4f} " ,
144181 f"{ r ['tput_per_gpu' ]:.4f} " ,
145182 f"{ r ['output_tput_per_gpu' ]:.4f} " ,
146183 f"{ r ['input_tput_per_gpu' ]:.4f} " ,
0 commit comments