|
186 | 186 | " 'Mean_TPOT_ms',\n", |
187 | 187 | " 'Mean_ITL_ms',\n", |
188 | 188 | " 'Mean_E2EL_ms',\n", |
| 189 | + " 'Is_PD',\n", |
| 190 | + " 'Num_GPUs',\n", |
| 191 | + " 'Thpt_per_GPU',\n", |
| 192 | + " 'Thpt_per_User',\n", |
189 | 193 | " ])\n", |
190 | 194 | "\n", |
191 | 195 | "\n", |
|
224 | 228 | " rp['d_ep'] = None\n", |
225 | 229 | " if rp['replicas']:\n", |
226 | 230 | " # We have a standalone setup\n", |
| 231 | + " rp['is_pd'] = False\n", |
227 | 232 | " rp['tp'] = report.scenario.host.accelerator[0].parallelism.tp\n", |
228 | 233 | " rp['dp'] = report.scenario.host.accelerator[0].parallelism.dp\n", |
229 | 234 | " rp['pp'] = report.scenario.host.accelerator[0].parallelism.pp\n", |
230 | 235 | " rp['ep'] = report.scenario.host.accelerator[0].parallelism.ep\n", |
231 | 236 | " return rp\n", |
232 | 237 | " # We have a P/D setup\n", |
| 238 | + " rp['is_pd'] = True\n", |
233 | 239 | " for ii, accel in enumerate(report.scenario.host.accelerator):\n", |
234 | 240 | " if report.scenario.host.type[ii] is schema.HostType.PREFILL and not rp['p_tp']:\n", |
235 | 241 | " rp['p_tp'] = accel.parallelism.tp\n", |
|
275 | 281 | " \"\"\"\n", |
276 | 282 | " report = convert.import_benchmark_report(br_file)\n", |
277 | 283 | " rp = _get_replicas_and_parallelism(report)\n", |
| 284 | + "\n", |
278 | 285 | " # TODO getting concurrency is speciffic to each harness, will need\n", |
279 | 286 | " # a way to capture this universally in the report so we don't have to do\n", |
280 | 287 | " # extractions like this\n", |
|
289 | 296 | " warn('\"Concurrency\" is not defined, setting to 1, \"Thpt_per_User\" and Pareto plots will also be invalid.')\n", |
290 | 297 | " concurrency = 1\n", |
291 | 298 | "\n", |
| 299 | + " # Calculated columns\n", |
| 300 | + " if rp['is_pd']:\n", |
| 301 | + " num_gpus = rp['p_tp']*rp['p_replicas'] + rp['d_tp']*rp['d_replicas']\n", |
| 302 | + " else:\n", |
| 303 | + " num_gpus = rp['tp']*rp['replicas']\n", |
| 304 | + " thpt_per_gpu = report.metrics.throughput.output_tokens_per_sec/num_gpus\n", |
| 305 | + " thpt_per_user = report.metrics.throughput.output_tokens_per_sec/concurrency\n", |
| 306 | + "\n", |
| 307 | + " # Add row to DataFrame\n", |
292 | 308 | " runs_df.loc[len(runs_df)] = {\n", |
293 | 309 | " 'Name': _make_name(report),\n", |
294 | 310 | " # We want the base directory for the sweep, which is two levels up\n", |
|
314 | 330 | " 'Concurrency': concurrency,\n", |
315 | 331 | " # TODO this may need to be configurable...\n", |
316 | 332 | " # We need to group by ISL/OSL exactly, so round and convert to int.\n", |
317 | | - " # Round ISL to nearest 10's\n", |
318 | | - " 'ISL': int(round(report.metrics.requests.input_length.mean, -1)),\n", |
319 | | - " 'OSL': int(round(report.metrics.requests.output_length.mean)),\n", |
| 333 | + " # Round ISL to nearest 100's\n", |
| 334 | + " 'ISL': int(round(report.metrics.requests.input_length.mean, -2)),\n", |
| 335 | + " 'OSL': int(round(report.metrics.requests.output_length.mean, -2)),\n", |
320 | 336 | " 'Duration': report.metrics.time.duration,\n", |
321 | 337 | " 'Completed': report.metrics.requests.total,\n", |
322 | 338 | " 'Request_Throughput': report.metrics.throughput.requests_per_sec,\n", |
323 | 339 | " 'Output_Token_Throughput': report.metrics.throughput.output_tokens_per_sec,\n", |
324 | 340 | " 'Total_Token_Throughput': report.metrics.throughput.total_tokens_per_sec,\n", |
325 | | - " 'Mean_TTFT_ms': report.metrics.latency.time_to_first_token,\n", |
326 | | - " 'Mean_TPOT_ms': report.metrics.latency,\n", |
327 | | - " 'Mean_ITL_ms': report.metrics.latency,\n", |
328 | | - " 'Mean_E2EL_ms': report.metrics.latency,\n", |
| 341 | + " 'Mean_TTFT_ms': report.metrics.latency.time_to_first_token.mean,\n", |
| 342 | + " 'Mean_TPOT_ms': report.metrics.latency.time_per_output_token.mean,\n", |
| 343 | + " 'Mean_ITL_ms': report.metrics.latency.inter_token_latency.mean,\n", |
| 344 | + " 'Mean_E2EL_ms': report.metrics.latency.request_latency.mean,\n", |
| 345 | + " 'Is_PD': rp['is_pd'],\n", |
| 346 | + " 'Num_GPUs': num_gpus,\n", |
| 347 | + " 'Thpt_per_GPU': thpt_per_gpu,\n", |
| 348 | + " 'Thpt_per_User': thpt_per_user,\n", |
329 | 349 | " }\n", |
330 | | - " # Add calculated columns\n", |
331 | | - " if rp['tp']:\n", |
332 | | - " runs_df['Is_PD'] = False\n", |
333 | | - " runs_df['Num_GPUs'] = runs_df['TP']*runs_df['Replicas']\n", |
334 | | - " else:\n", |
335 | | - " runs_df['Is_PD'] = True\n", |
336 | | - " runs_df['Num_GPUs'] = runs_df['P_TP']*runs_df['P_Replicas'] + runs_df['D_TP']*runs_df['D_Replicas']\n", |
337 | | - " runs_df['Thpt_per_GPU'] = runs_df['Output_Token_Throughput']/runs_df['Num_GPUs']\n", |
338 | | - " runs_df['Thpt_per_User'] = runs_df['Output_Token_Throughput']/runs_df['Concurrency']\n", |
339 | 350 | "\n", |
340 | 351 | "\n", |
341 | 352 | "def get_scenarios(runs_df: pandas.core.frame.DataFrame) -> list[tuple[str]]:\n", |
|
419 | 430 | " info(f'Searching for benchmark report files within {sdir}')\n", |
420 | 431 | " # Find all benchmark report files in the directory\n", |
421 | 432 | " for br_file in get_benchmark_report_files(sdir):\n", |
422 | | - " info(f'Importing {br_file}')\n", |
| 433 | + " #info(f'Importing {br_file}')\n", |
423 | 434 | " # Import the results and add to the runs DataFrame\n", |
424 | 435 | " add_benchmark_report_to_df(runs, br_file)" |
425 | 436 | ] |
|
489 | 500 | " (runs['Model'] == model) &\n", |
490 | 501 | " (runs['GPU'] == gpu) &\n", |
491 | 502 | " (runs['ISL'] == isl) &\n", |
492 | | - " (runs['OSL'] == osl)][[\n", |
| 503 | + " (runs['OSL'] == osl) &\n", |
| 504 | + " (runs['Is_PD'] == True) ][[\n", |
493 | 505 | " 'Model',\n", |
494 | 506 | " 'GPU',\n", |
495 | 507 | " 'P_TP',\n", |
|
509 | 521 | " (runs['GPU'] == gpu) &\n", |
510 | 522 | " (runs['ISL'] == isl) &\n", |
511 | 523 | " (runs['OSL'] == osl) &\n", |
512 | | - " (runs['Is_PD']) == False][[\n", |
| 524 | + " (runs['Is_PD'] == False) ][[\n", |
513 | 525 | " 'Model',\n", |
514 | 526 | " 'GPU',\n", |
515 | 527 | " 'TP',\n", |
|
532 | 544 | "if seg_by_dir:\n", |
533 | 545 | " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP', 'Directory']).index))\n", |
534 | 546 | " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP', 'Directory']).index))\n", |
535 | | - " for conf in configs_pd:\n", |
536 | | - " config_sets.append((\n", |
537 | | - " None, # Replicas\n", |
538 | | - " None, # TP\n", |
539 | | - " conf[0], # P replicas\n", |
540 | | - " conf[1], # P TP\n", |
541 | | - " conf[2], # D replicas\n", |
542 | | - " conf[3], # D TP\n", |
543 | | - " conf[4], # Directory\n", |
544 | | - " True # Is PD\n", |
545 | | - " ))\n", |
546 | | - " for conf in configs_sa:\n", |
547 | | - " config_sets.append((\n", |
548 | | - " conf[0], # Replicas\n", |
549 | | - " conf[0], # TP\n", |
550 | | - " None, # P replicas\n", |
551 | | - " None, # P TP\n", |
552 | | - " None, # D replicas\n", |
553 | | - " None, # D TP\n", |
554 | | - " conf[2], # Directory\n", |
555 | | - " False # Is PD\n", |
556 | | - " ))\n", |
| 547 | + " if show_pd:\n", |
| 548 | + " for conf in configs_pd:\n", |
| 549 | + " config_sets.append((\n", |
| 550 | + " 0, # Replicas\n", |
| 551 | + " 0, # TP\n", |
| 552 | + " conf[0], # P replicas\n", |
| 553 | + " conf[1], # P TP\n", |
| 554 | + " conf[2], # D replicas\n", |
| 555 | + " conf[3], # D TP\n", |
| 556 | + " conf[4], # Directory\n", |
| 557 | + " True, # Is PD\n", |
| 558 | + " ))\n", |
| 559 | + " if show_sa:\n", |
| 560 | + " for conf in configs_sa:\n", |
| 561 | + " config_sets.append((\n", |
| 562 | + " conf[0], # Replicas\n", |
| 563 | + " conf[1], # TP\n", |
| 564 | + " 0, # P replicas\n", |
| 565 | + " 0, # P TP\n", |
| 566 | + " 0, # D replicas\n", |
| 567 | + " 0, # D TP\n", |
| 568 | + " conf[2], # Directory\n", |
| 569 | + " False # Is PD\n", |
| 570 | + " ))\n", |
557 | 571 | "else:\n", |
558 | 572 | " pd_runs_selected = pd_runs_selected.drop('Directory', axis=1)\n", |
559 | 573 | " sa_runs_selected = sa_runs_selected.drop('Directory', axis=1)\n", |
560 | 574 | " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP']).index))\n", |
561 | 575 | " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP']).index))\n", |
562 | | - " for conf in configs_pd:\n", |
563 | | - " config_sets.append((\n", |
564 | | - " None, # Replicas\n", |
565 | | - " None, # TP\n", |
566 | | - " conf[0], # P replicas\n", |
567 | | - " conf[1], # P TP\n", |
568 | | - " conf[2], # D replicas\n", |
569 | | - " conf[3], # D TP\n", |
570 | | - " None, # Directory\n", |
571 | | - " True # Is PD\n", |
572 | | - " ))\n", |
573 | | - " for conf in configs_sa:\n", |
574 | | - " config_sets.append((\n", |
575 | | - " conf[0], # Replicas\n", |
576 | | - " conf[0], # TP\n", |
577 | | - " None, # P replicas\n", |
578 | | - " None, # P TP\n", |
579 | | - " None, # D replicas\n", |
580 | | - " None, # D TP\n", |
581 | | - " None, # Directory\n", |
582 | | - " False # Is PD\n", |
583 | | - " ))\n", |
| 576 | + " if show_pd:\n", |
| 577 | + " for conf in configs_pd:\n", |
| 578 | + " config_sets.append((\n", |
| 579 | + " 0, # Replicas\n", |
| 580 | + " 0, # TP\n", |
| 581 | + " conf[0], # P replicas\n", |
| 582 | + " conf[1], # P TP\n", |
| 583 | + " conf[2], # D replicas\n", |
| 584 | + " conf[3], # D TP\n", |
| 585 | + " 0, # Directory\n", |
| 586 | + " True, # Is PD\n", |
| 587 | + " ))\n", |
| 588 | + " if show_sa:\n", |
| 589 | + " for conf in configs_sa:\n", |
| 590 | + " config_sets.append((\n", |
| 591 | + " conf[0], # Replicas\n", |
| 592 | + " conf[1], # TP\n", |
| 593 | + " 0, # P replicas\n", |
| 594 | + " 0, # P TP\n", |
| 595 | + " 0, # D replicas\n", |
| 596 | + " 0, # D TP\n", |
| 597 | + " 0, # Directory\n", |
| 598 | + " False # Is PD\n", |
| 599 | + " ))\n", |
584 | 600 | "\n", |
585 | 601 | "# Sort so prinouts/plots are organized\n", |
586 | 602 | "config_sets.sort()\n", |
|
599 | 615 | " 'is_pd': conf[7],\n", |
600 | 616 | " })\n", |
601 | 617 | "\n", |
| 618 | + "if not configs:\n", |
| 619 | + " if show_pd:\n", |
| 620 | + " print('No P/D configurations for this scenario!')\n", |
| 621 | + " if show_sa:\n", |
| 622 | + " print('No standalone configurations for this scenario!')\n", |
| 623 | + "\n", |
602 | 624 | "# Sweep through configurations\n", |
603 | 625 | "for ii, conf in enumerate(configs):\n", |
604 | 626 | " is_pd = 'P_TP' in conf\n", |
|
612 | 634 | " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", |
613 | 635 | " (pd_runs_selected['D_TP'] == conf['d_tp']) &\n", |
614 | 636 | " (pd_runs_selected['Directory'] == conf['dir'])\n", |
615 | | - " ].sort_values(by='Concurrency')\n", |
| 637 | + " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", |
| 638 | + "\n", |
| 639 | + " print(pd_runs_selected.iloc[0]['Directory'])\n", |
616 | 640 | " else:\n", |
617 | 641 | " conf_df = pd_runs_selected[\n", |
618 | 642 | " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", |
|
621 | 645 | " (pd_runs_selected['D_TP'] == conf['d_tp'])\n", |
622 | 646 | " ].sort_values(by='Concurrency')\n", |
623 | 647 | "\n", |
| 648 | + " \n", |
624 | 649 | " # Print table\n", |
625 | 650 | " display(conf_df)\n", |
626 | 651 | " \n", |
|
641 | 666 | " (sa_runs_selected['Replicas'] == conf['rep']) &\n", |
642 | 667 | " (sa_runs_selected['TP'] == conf['tp']) &\n", |
643 | 668 | " (sa_runs_selected['Directory'] == conf['dir'])\n", |
644 | | - " ].sort_values(by='Concurrency')\n", |
| 669 | + " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", |
| 670 | + "\n", |
| 671 | + " print(sa_runs_selected.iloc[0]['Directory'])\n", |
645 | 672 | " else:\n", |
646 | 673 | " conf_df = sa_runs_selected[\n", |
647 | 674 | " (sa_runs_selected['Replicas'] == conf['rep']) &\n", |
|
662 | 689 | " list(conf_df.Thpt_per_GPU)[jj]+sa_runs_selected['Thpt_per_GPU'].max()*0.02,\n", |
663 | 690 | " str(val), ha='center', color=colors[ii%len(colors)])\n", |
664 | 691 | "\n", |
665 | | - "plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", |
666 | | - "plt.xlabel('Tok/s/User', fontsize='16')\n", |
667 | | - "plt.ylabel('Tok/s/GPU', fontsize='16')\n", |
668 | | - "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", |
669 | | - "plt.grid(True, linewidth=1, ls='--', color='gray')\n", |
670 | | - "plt.axis([0, None, 0, None])\n", |
671 | | - "plt.show()\n" |
| 692 | + "if configs:\n", |
| 693 | + " plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", |
| 694 | + " plt.xlabel('Tok/s/User', fontsize='16')\n", |
| 695 | + " plt.ylabel('Tok/s/GPU', fontsize='16')\n", |
| 696 | + " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", |
| 697 | + " plt.grid(True, linewidth=1, ls='--', color='gray')\n", |
| 698 | + " plt.axis([0, None, 0, None])\n", |
| 699 | + " plt.show()\n" |
672 | 700 | ] |
673 | 701 | }, |
674 | 702 | { |
675 | 703 | "cell_type": "code", |
676 | 704 | "execution_count": null, |
677 | | - "id": "d5299c3c-65cb-48e3-b381-6fe8b89a26a0", |
| 705 | + "id": "e1f5bfca-b25a-46d3-84e7-b4fd423c50f6", |
678 | 706 | "metadata": {}, |
679 | 707 | "outputs": [], |
680 | 708 | "source": [] |
|
0 commit comments