Skip to content

Commit 2d1e5cb

Browse files
authored
Fix analysis bugs (llm-d#260)
Signed-off-by: Nick Masluk <nick@randombytes.net>
1 parent ec13ebc commit 2d1e5cb

File tree

1 file changed

+101
-73
lines changed

1 file changed

+101
-73
lines changed

analysis/analysis.ipynb

Lines changed: 101 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@
186186
" 'Mean_TPOT_ms',\n",
187187
" 'Mean_ITL_ms',\n",
188188
" 'Mean_E2EL_ms',\n",
189+
" 'Is_PD',\n",
190+
" 'Num_GPUs',\n",
191+
" 'Thpt_per_GPU',\n",
192+
" 'Thpt_per_User',\n",
189193
" ])\n",
190194
"\n",
191195
"\n",
@@ -224,12 +228,14 @@
224228
" rp['d_ep'] = None\n",
225229
" if rp['replicas']:\n",
226230
" # We have a standalone setup\n",
231+
" rp['is_pd'] = False\n",
227232
" rp['tp'] = report.scenario.host.accelerator[0].parallelism.tp\n",
228233
" rp['dp'] = report.scenario.host.accelerator[0].parallelism.dp\n",
229234
" rp['pp'] = report.scenario.host.accelerator[0].parallelism.pp\n",
230235
" rp['ep'] = report.scenario.host.accelerator[0].parallelism.ep\n",
231236
" return rp\n",
232237
" # We have a P/D setup\n",
238+
" rp['is_pd'] = True\n",
233239
" for ii, accel in enumerate(report.scenario.host.accelerator):\n",
234240
" if report.scenario.host.type[ii] is schema.HostType.PREFILL and not rp['p_tp']:\n",
235241
" rp['p_tp'] = accel.parallelism.tp\n",
@@ -275,6 +281,7 @@
275281
" \"\"\"\n",
276282
" report = convert.import_benchmark_report(br_file)\n",
277283
" rp = _get_replicas_and_parallelism(report)\n",
284+
"\n",
278285
" # TODO getting concurrency is speciffic to each harness, will need\n",
279286
" # a way to capture this universally in the report so we don't have to do\n",
280287
" # extractions like this\n",
@@ -289,6 +296,15 @@
289296
" warn('\"Concurrency\" is not defined, setting to 1, \"Thpt_per_User\" and Pareto plots will also be invalid.')\n",
290297
" concurrency = 1\n",
291298
"\n",
299+
" # Calculated columns\n",
300+
" if rp['is_pd']:\n",
301+
" num_gpus = rp['p_tp']*rp['p_replicas'] + rp['d_tp']*rp['d_replicas']\n",
302+
" else:\n",
303+
" num_gpus = rp['tp']*rp['replicas']\n",
304+
" thpt_per_gpu = report.metrics.throughput.output_tokens_per_sec/num_gpus\n",
305+
" thpt_per_user = report.metrics.throughput.output_tokens_per_sec/concurrency\n",
306+
"\n",
307+
" # Add row to DataFrame\n",
292308
" runs_df.loc[len(runs_df)] = {\n",
293309
" 'Name': _make_name(report),\n",
294310
" # We want the base directory for the sweep, which is two levels up\n",
@@ -314,28 +330,23 @@
314330
" 'Concurrency': concurrency,\n",
315331
" # TODO this may need to be configurable...\n",
316332
" # We need to group by ISL/OSL exactly, so round and convert to int.\n",
317-
" # Round ISL to nearest 10's\n",
318-
" 'ISL': int(round(report.metrics.requests.input_length.mean, -1)),\n",
319-
" 'OSL': int(round(report.metrics.requests.output_length.mean)),\n",
333+
" # Round ISL to nearest 100's\n",
334+
" 'ISL': int(round(report.metrics.requests.input_length.mean, -2)),\n",
335+
" 'OSL': int(round(report.metrics.requests.output_length.mean, -2)),\n",
320336
" 'Duration': report.metrics.time.duration,\n",
321337
" 'Completed': report.metrics.requests.total,\n",
322338
" 'Request_Throughput': report.metrics.throughput.requests_per_sec,\n",
323339
" 'Output_Token_Throughput': report.metrics.throughput.output_tokens_per_sec,\n",
324340
" 'Total_Token_Throughput': report.metrics.throughput.total_tokens_per_sec,\n",
325-
" 'Mean_TTFT_ms': report.metrics.latency.time_to_first_token,\n",
326-
" 'Mean_TPOT_ms': report.metrics.latency,\n",
327-
" 'Mean_ITL_ms': report.metrics.latency,\n",
328-
" 'Mean_E2EL_ms': report.metrics.latency,\n",
341+
" 'Mean_TTFT_ms': report.metrics.latency.time_to_first_token.mean,\n",
342+
" 'Mean_TPOT_ms': report.metrics.latency.time_per_output_token.mean,\n",
343+
" 'Mean_ITL_ms': report.metrics.latency.inter_token_latency.mean,\n",
344+
" 'Mean_E2EL_ms': report.metrics.latency.request_latency.mean,\n",
345+
" 'Is_PD': rp['is_pd'],\n",
346+
" 'Num_GPUs': num_gpus,\n",
347+
" 'Thpt_per_GPU': thpt_per_gpu,\n",
348+
" 'Thpt_per_User': thpt_per_user,\n",
329349
" }\n",
330-
" # Add calculated columns\n",
331-
" if rp['tp']:\n",
332-
" runs_df['Is_PD'] = False\n",
333-
" runs_df['Num_GPUs'] = runs_df['TP']*runs_df['Replicas']\n",
334-
" else:\n",
335-
" runs_df['Is_PD'] = True\n",
336-
" runs_df['Num_GPUs'] = runs_df['P_TP']*runs_df['P_Replicas'] + runs_df['D_TP']*runs_df['D_Replicas']\n",
337-
" runs_df['Thpt_per_GPU'] = runs_df['Output_Token_Throughput']/runs_df['Num_GPUs']\n",
338-
" runs_df['Thpt_per_User'] = runs_df['Output_Token_Throughput']/runs_df['Concurrency']\n",
339350
"\n",
340351
"\n",
341352
"def get_scenarios(runs_df: pandas.core.frame.DataFrame) -> list[tuple[str]]:\n",
@@ -419,7 +430,7 @@
419430
" info(f'Searching for benchmark report files within {sdir}')\n",
420431
" # Find all benchmark report files in the directory\n",
421432
" for br_file in get_benchmark_report_files(sdir):\n",
422-
" info(f'Importing {br_file}')\n",
433+
" #info(f'Importing {br_file}')\n",
423434
" # Import the results and add to the runs DataFrame\n",
424435
" add_benchmark_report_to_df(runs, br_file)"
425436
]
@@ -489,7 +500,8 @@
489500
" (runs['Model'] == model) &\n",
490501
" (runs['GPU'] == gpu) &\n",
491502
" (runs['ISL'] == isl) &\n",
492-
" (runs['OSL'] == osl)][[\n",
503+
" (runs['OSL'] == osl) &\n",
504+
" (runs['Is_PD'] == True) ][[\n",
493505
" 'Model',\n",
494506
" 'GPU',\n",
495507
" 'P_TP',\n",
@@ -509,7 +521,7 @@
509521
" (runs['GPU'] == gpu) &\n",
510522
" (runs['ISL'] == isl) &\n",
511523
" (runs['OSL'] == osl) &\n",
512-
" (runs['Is_PD']) == False][[\n",
524+
" (runs['Is_PD'] == False) ][[\n",
513525
" 'Model',\n",
514526
" 'GPU',\n",
515527
" 'TP',\n",
@@ -532,55 +544,59 @@
532544
"if seg_by_dir:\n",
533545
" configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP', 'Directory']).index))\n",
534546
" configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP', 'Directory']).index))\n",
535-
" for conf in configs_pd:\n",
536-
" config_sets.append((\n",
537-
" None, # Replicas\n",
538-
" None, # TP\n",
539-
" conf[0], # P replicas\n",
540-
" conf[1], # P TP\n",
541-
" conf[2], # D replicas\n",
542-
" conf[3], # D TP\n",
543-
" conf[4], # Directory\n",
544-
" True # Is PD\n",
545-
" ))\n",
546-
" for conf in configs_sa:\n",
547-
" config_sets.append((\n",
548-
" conf[0], # Replicas\n",
549-
" conf[0], # TP\n",
550-
" None, # P replicas\n",
551-
" None, # P TP\n",
552-
" None, # D replicas\n",
553-
" None, # D TP\n",
554-
" conf[2], # Directory\n",
555-
" False # Is PD\n",
556-
" ))\n",
547+
" if show_pd:\n",
548+
" for conf in configs_pd:\n",
549+
" config_sets.append((\n",
550+
" 0, # Replicas\n",
551+
" 0, # TP\n",
552+
" conf[0], # P replicas\n",
553+
" conf[1], # P TP\n",
554+
" conf[2], # D replicas\n",
555+
" conf[3], # D TP\n",
556+
" conf[4], # Directory\n",
557+
" True, # Is PD\n",
558+
" ))\n",
559+
" if show_sa:\n",
560+
" for conf in configs_sa:\n",
561+
" config_sets.append((\n",
562+
" conf[0], # Replicas\n",
563+
" conf[1], # TP\n",
564+
" 0, # P replicas\n",
565+
" 0, # P TP\n",
566+
" 0, # D replicas\n",
567+
" 0, # D TP\n",
568+
" conf[2], # Directory\n",
569+
" False # Is PD\n",
570+
" ))\n",
557571
"else:\n",
558572
" pd_runs_selected = pd_runs_selected.drop('Directory', axis=1)\n",
559573
" sa_runs_selected = sa_runs_selected.drop('Directory', axis=1)\n",
560574
" configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP']).index))\n",
561575
" configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP']).index))\n",
562-
" for conf in configs_pd:\n",
563-
" config_sets.append((\n",
564-
" None, # Replicas\n",
565-
" None, # TP\n",
566-
" conf[0], # P replicas\n",
567-
" conf[1], # P TP\n",
568-
" conf[2], # D replicas\n",
569-
" conf[3], # D TP\n",
570-
" None, # Directory\n",
571-
" True # Is PD\n",
572-
" ))\n",
573-
" for conf in configs_sa:\n",
574-
" config_sets.append((\n",
575-
" conf[0], # Replicas\n",
576-
" conf[0], # TP\n",
577-
" None, # P replicas\n",
578-
" None, # P TP\n",
579-
" None, # D replicas\n",
580-
" None, # D TP\n",
581-
" None, # Directory\n",
582-
" False # Is PD\n",
583-
" ))\n",
576+
" if show_pd:\n",
577+
" for conf in configs_pd:\n",
578+
" config_sets.append((\n",
579+
" 0, # Replicas\n",
580+
" 0, # TP\n",
581+
" conf[0], # P replicas\n",
582+
" conf[1], # P TP\n",
583+
" conf[2], # D replicas\n",
584+
" conf[3], # D TP\n",
585+
" 0, # Directory\n",
586+
" True, # Is PD\n",
587+
" ))\n",
588+
" if show_sa:\n",
589+
" for conf in configs_sa:\n",
590+
" config_sets.append((\n",
591+
" conf[0], # Replicas\n",
592+
" conf[1], # TP\n",
593+
" 0, # P replicas\n",
594+
" 0, # P TP\n",
595+
" 0, # D replicas\n",
596+
" 0, # D TP\n",
597+
" 0, # Directory\n",
598+
" False # Is PD\n",
599+
" ))\n",
584600
"\n",
585601
"# Sort so prinouts/plots are organized\n",
586602
"config_sets.sort()\n",
@@ -599,6 +615,12 @@
599615
" 'is_pd': conf[7],\n",
600616
" })\n",
601617
"\n",
618+
"if not configs:\n",
619+
" if show_pd:\n",
620+
" print('No P/D configurations for this scenario!')\n",
621+
" if show_sa:\n",
622+
" print('No standalone configurations for this scenario!')\n",
623+
"\n",
602624
"# Sweep through configurations\n",
603625
"for ii, conf in enumerate(configs):\n",
604626
" is_pd = 'P_TP' in conf\n",
@@ -612,7 +634,9 @@
612634
" (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n",
613635
" (pd_runs_selected['D_TP'] == conf['d_tp']) &\n",
614636
" (pd_runs_selected['Directory'] == conf['dir'])\n",
615-
" ].sort_values(by='Concurrency')\n",
637+
" ].drop('Directory', axis=1).sort_values(by='Concurrency')\n",
638+
"\n",
639+
" print(pd_runs_selected.iloc[0]['Directory'])\n",
616640
" else:\n",
617641
" conf_df = pd_runs_selected[\n",
618642
" (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n",
@@ -621,6 +645,7 @@
621645
" (pd_runs_selected['D_TP'] == conf['d_tp'])\n",
622646
" ].sort_values(by='Concurrency')\n",
623647
"\n",
648+
" \n",
624649
" # Print table\n",
625650
" display(conf_df)\n",
626651
" \n",
@@ -641,7 +666,9 @@
641666
" (sa_runs_selected['Replicas'] == conf['rep']) &\n",
642667
" (sa_runs_selected['TP'] == conf['tp']) &\n",
643668
" (sa_runs_selected['Directory'] == conf['dir'])\n",
644-
" ].sort_values(by='Concurrency')\n",
669+
" ].drop('Directory', axis=1).sort_values(by='Concurrency')\n",
670+
"\n",
671+
" print(sa_runs_selected.iloc[0]['Directory'])\n",
645672
" else:\n",
646673
" conf_df = sa_runs_selected[\n",
647674
" (sa_runs_selected['Replicas'] == conf['rep']) &\n",
@@ -662,19 +689,20 @@
662689
" list(conf_df.Thpt_per_GPU)[jj]+sa_runs_selected['Thpt_per_GPU'].max()*0.02,\n",
663690
" str(val), ha='center', color=colors[ii%len(colors)])\n",
664691
"\n",
665-
"plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n",
666-
"plt.xlabel('Tok/s/User', fontsize='16')\n",
667-
"plt.ylabel('Tok/s/GPU', fontsize='16')\n",
668-
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
669-
"plt.grid(True, linewidth=1, ls='--', color='gray')\n",
670-
"plt.axis([0, None, 0, None])\n",
671-
"plt.show()\n"
692+
"if configs:\n",
693+
" plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n",
694+
" plt.xlabel('Tok/s/User', fontsize='16')\n",
695+
" plt.ylabel('Tok/s/GPU', fontsize='16')\n",
696+
" plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
697+
" plt.grid(True, linewidth=1, ls='--', color='gray')\n",
698+
" plt.axis([0, None, 0, None])\n",
699+
" plt.show()\n"
672700
]
673701
},
674702
{
675703
"cell_type": "code",
676704
"execution_count": null,
677-
"id": "d5299c3c-65cb-48e3-b381-6fe8b89a26a0",
705+
"id": "e1f5bfca-b25a-46d3-84e7-b4fd423c50f6",
678706
"metadata": {},
679707
"outputs": [],
680708
"source": []

0 commit comments

Comments
 (0)