Fix analysis bugs (llm-d#260)

namasl · web-flow · commit 2d1e5cb403ae · 2025-08-13T18:40:35.000-04:00
Signed-off-by: Nick Masluk &lt;nick@randombytes.net&gt;
diff --git a/analysis/analysis.ipynb b/analysis/analysis.ipynb
@@ -186,6 +186,10 @@
     "        'Mean_TPOT_ms',\n",
     "        'Mean_ITL_ms',\n",
     "        'Mean_E2EL_ms',\n",
+    "        'Is_PD',\n",
+    "        'Num_GPUs',\n",
+    "        'Thpt_per_GPU',\n",
+    "        'Thpt_per_User',\n",
     "    ])\n",
     "\n",
     "\n",
@@ -224,12 +228,14 @@
     "    rp['d_ep'] = None\n",
     "    if rp['replicas']:\n",
     "        # We have a standalone setup\n",
+    "        rp['is_pd'] = False\n",
     "        rp['tp'] = report.scenario.host.accelerator[0].parallelism.tp\n",
     "        rp['dp'] = report.scenario.host.accelerator[0].parallelism.dp\n",
     "        rp['pp'] = report.scenario.host.accelerator[0].parallelism.pp\n",
     "        rp['ep'] = report.scenario.host.accelerator[0].parallelism.ep\n",
     "        return rp\n",
     "    # We have a P/D setup\n",
+    "    rp['is_pd'] = True\n",
     "    for ii, accel in enumerate(report.scenario.host.accelerator):\n",
     "        if report.scenario.host.type[ii] is schema.HostType.PREFILL and not rp['p_tp']:\n",
     "            rp['p_tp'] = accel.parallelism.tp\n",
@@ -275,6 +281,7 @@
     "    \"\"\"\n",
     "    report = convert.import_benchmark_report(br_file)\n",
     "    rp = _get_replicas_and_parallelism(report)\n",
+    "\n",
     "    # TODO getting concurrency is speciffic to each harness, will need\n",
     "    # a way to capture this universally in the report so we don't have to do\n",
     "    # extractions like this\n",
@@ -289,6 +296,15 @@
     "        warn('\"Concurrency\" is not defined, setting to 1, \"Thpt_per_User\" and Pareto plots will also be invalid.')\n",
     "        concurrency = 1\n",
     "\n",
+    "    # Calculated columns\n",
+    "    if rp['is_pd']:\n",
+    "        num_gpus = rp['p_tp']*rp['p_replicas'] + rp['d_tp']*rp['d_replicas']\n",
+    "    else:\n",
+    "        num_gpus = rp['tp']*rp['replicas']\n",
+    "    thpt_per_gpu = report.metrics.throughput.output_tokens_per_sec/num_gpus\n",
+    "    thpt_per_user = report.metrics.throughput.output_tokens_per_sec/concurrency\n",
+    "\n",
+    "    # Add row to DataFrame\n",
     "    runs_df.loc[len(runs_df)] = {\n",
     "        'Name': _make_name(report),\n",
     "        # We want the base directory for the sweep, which is two levels up\n",
@@ -314,28 +330,23 @@
     "        'Concurrency': concurrency,\n",
     "        # TODO this may need to be configurable...\n",
     "        # We need to group by ISL/OSL exactly, so round and convert to int.\n",
-    "        # Round ISL to nearest 10's\n",
-    "        'ISL': int(round(report.metrics.requests.input_length.mean, -1)),\n",
-    "        'OSL': int(round(report.metrics.requests.output_length.mean)),\n",
+    "        # Round ISL to nearest 100's\n",
+    "        'ISL': int(round(report.metrics.requests.input_length.mean, -2)),\n",
+    "        'OSL': int(round(report.metrics.requests.output_length.mean, -2)),\n",
     "        'Duration': report.metrics.time.duration,\n",
     "        'Completed': report.metrics.requests.total,\n",
     "        'Request_Throughput': report.metrics.throughput.requests_per_sec,\n",
     "        'Output_Token_Throughput': report.metrics.throughput.output_tokens_per_sec,\n",
     "        'Total_Token_Throughput': report.metrics.throughput.total_tokens_per_sec,\n",
-    "        'Mean_TTFT_ms': report.metrics.latency.time_to_first_token,\n",
-    "        'Mean_TPOT_ms': report.metrics.latency,\n",
-    "        'Mean_ITL_ms': report.metrics.latency,\n",
-    "        'Mean_E2EL_ms': report.metrics.latency,\n",
+    "        'Mean_TTFT_ms': report.metrics.latency.time_to_first_token.mean,\n",
+    "        'Mean_TPOT_ms': report.metrics.latency.time_per_output_token.mean,\n",
+    "        'Mean_ITL_ms': report.metrics.latency.inter_token_latency.mean,\n",
+    "        'Mean_E2EL_ms': report.metrics.latency.request_latency.mean,\n",
+    "        'Is_PD': rp['is_pd'],\n",
+    "        'Num_GPUs': num_gpus,\n",
+    "        'Thpt_per_GPU': thpt_per_gpu,\n",
+    "        'Thpt_per_User': thpt_per_user,\n",
     "    }\n",
-    "    # Add calculated columns\n",
-    "    if rp['tp']:\n",
-    "        runs_df['Is_PD'] = False\n",
-    "        runs_df['Num_GPUs'] = runs_df['TP']*runs_df['Replicas']\n",
-    "    else:\n",
-    "        runs_df['Is_PD'] = True\n",
-    "        runs_df['Num_GPUs'] = runs_df['P_TP']*runs_df['P_Replicas'] + runs_df['D_TP']*runs_df['D_Replicas']\n",
-    "    runs_df['Thpt_per_GPU'] = runs_df['Output_Token_Throughput']/runs_df['Num_GPUs']\n",
-    "    runs_df['Thpt_per_User'] = runs_df['Output_Token_Throughput']/runs_df['Concurrency']\n",
     "\n",
     "\n",
     "def get_scenarios(runs_df: pandas.core.frame.DataFrame) -> list[tuple[str]]:\n",
@@ -419,7 +430,7 @@
     "    info(f'Searching for benchmark report files within {sdir}')\n",
     "    # Find all benchmark report files in the directory\n",
     "    for br_file in get_benchmark_report_files(sdir):\n",
-    "        info(f'Importing {br_file}')\n",
+    "        #info(f'Importing {br_file}')\n",
     "        # Import the results and add to the runs DataFrame\n",
     "        add_benchmark_report_to_df(runs, br_file)"
    ]
@@ -489,7 +500,8 @@
     "    (runs['Model'] == model) &\n",
     "    (runs['GPU'] == gpu) &\n",
     "    (runs['ISL'] == isl) &\n",
-    "    (runs['OSL'] == osl)][[\n",
+    "    (runs['OSL'] == osl) &\n",
+    "    (runs['Is_PD'] == True) ][[\n",
     "    'Model',\n",
     "    'GPU',\n",
     "    'P_TP',\n",
@@ -509,7 +521,7 @@
     "    (runs['GPU'] == gpu) &\n",
     "    (runs['ISL'] == isl) &\n",
     "    (runs['OSL'] == osl) &\n",
-    "    (runs['Is_PD']) == False][[\n",
+    "    (runs['Is_PD'] == False) ][[\n",
     "    'Model',\n",
     "    'GPU',\n",
     "    'TP',\n",
@@ -532,55 +544,59 @@
     "if seg_by_dir:\n",
     "    configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP', 'Directory']).index))\n",
     "    configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP', 'Directory']).index))\n",
-    "    for conf in configs_pd:\n",
-    "        config_sets.append((\n",
-    "            None,    # Replicas\n",
-    "            None,    # TP\n",
-    "            conf[0], # P replicas\n",
-    "            conf[1], # P TP\n",
-    "            conf[2], # D replicas\n",
-    "            conf[3], # D TP\n",
-    "            conf[4], # Directory\n",
-    "            True     # Is PD\n",
-    "        ))\n",
-    "    for conf in configs_sa:\n",
-    "        config_sets.append((\n",
-    "            conf[0], # Replicas\n",
-    "            conf[0], # TP\n",
-    "            None,    # P replicas\n",
-    "            None,    # P TP\n",
-    "            None,    # D replicas\n",
-    "            None,    # D TP\n",
-    "            conf[2], # Directory\n",
-    "            False    # Is PD\n",
-    "        ))\n",
+    "    if show_pd:\n",
+    "        for conf in configs_pd:\n",
+    "            config_sets.append((\n",
+    "                0,       # Replicas\n",
+    "                0,       # TP\n",
+    "                conf[0], # P replicas\n",
+    "                conf[1], # P TP\n",
+    "                conf[2], # D replicas\n",
+    "                conf[3], # D TP\n",
+    "                conf[4], # Directory\n",
+    "                True,    # Is PD\n",
+    "            ))\n",
+    "    if show_sa:\n",
+    "        for conf in configs_sa:\n",
+    "            config_sets.append((\n",
+    "                conf[0], # Replicas\n",
+    "                conf[1], # TP\n",
+    "                0,       # P replicas\n",
+    "                0,       # P TP\n",
+    "                0,       # D replicas\n",
+    "                0,       # D TP\n",
+    "                conf[2], # Directory\n",
+    "                False    # Is PD\n",
+    "            ))\n",
     "else:\n",
     "    pd_runs_selected = pd_runs_selected.drop('Directory', axis=1)\n",
     "    sa_runs_selected = sa_runs_selected.drop('Directory', axis=1)\n",
     "    configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP']).index))\n",
     "    configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP']).index))\n",
-    "    for conf in configs_pd:\n",
-    "        config_sets.append((\n",
-    "            None,    # Replicas\n",
-    "            None,    # TP\n",
-    "            conf[0], # P replicas\n",
-    "            conf[1], # P TP\n",
-    "            conf[2], # D replicas\n",
-    "            conf[3], # D TP\n",
-    "            None,    # Directory\n",
-    "            True     # Is PD\n",
-    "        ))\n",
-    "    for conf in configs_sa:\n",
-    "        config_sets.append((\n",
-    "            conf[0], # Replicas\n",
-    "            conf[0], # TP\n",
-    "            None,    # P replicas\n",
-    "            None,    # P TP\n",
-    "            None,    # D replicas\n",
-    "            None,    # D TP\n",
-    "            None,    # Directory\n",
-    "            False    # Is PD\n",
-    "        ))\n",
+    "    if show_pd:\n",
+    "        for conf in configs_pd:\n",
+    "            config_sets.append((\n",
+    "                0,       # Replicas\n",
+    "                0,       # TP\n",
+    "                conf[0], # P replicas\n",
+    "                conf[1], # P TP\n",
+    "                conf[2], # D replicas\n",
+    "                conf[3], # D TP\n",
+    "                0,       # Directory\n",
+    "                True,    # Is PD\n",
+    "            ))\n",
+    "    if show_sa:\n",
+    "        for conf in configs_sa:\n",
+    "            config_sets.append((\n",
+    "                conf[0], # Replicas\n",
+    "                conf[1], # TP\n",
+    "                0,       # P replicas\n",
+    "                0,       # P TP\n",
+    "                0,       # D replicas\n",
+    "                0,       # D TP\n",
+    "                0,       # Directory\n",
+    "                False    # Is PD\n",
+    "            ))\n",
     "\n",
     "# Sort so prinouts/plots are organized\n",
     "config_sets.sort()\n",
@@ -599,6 +615,12 @@
     "        'is_pd': conf[7],\n",
     "    })\n",
     "\n",
+    "if not configs:\n",
+    "    if show_pd:\n",
+    "        print('No P/D configurations for this scenario!')\n",
+    "    if show_sa:\n",
+    "        print('No standalone configurations for this scenario!')\n",
+    "\n",
     "# Sweep through configurations\n",
     "for ii, conf in enumerate(configs):\n",
     "    is_pd = 'P_TP' in conf\n",
@@ -612,7 +634,9 @@
     "                (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n",
     "                (pd_runs_selected['D_TP'] == conf['d_tp']) &\n",
     "                (pd_runs_selected['Directory'] == conf['dir'])\n",
-    "            ].sort_values(by='Concurrency')\n",
+    "            ].drop('Directory', axis=1).sort_values(by='Concurrency')\n",
+    "\n",
+    "            print(pd_runs_selected.iloc[0]['Directory'])\n",
     "        else:\n",
     "            conf_df = pd_runs_selected[\n",
     "                (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n",
@@ -621,6 +645,7 @@
     "                (pd_runs_selected['D_TP'] == conf['d_tp'])\n",
     "            ].sort_values(by='Concurrency')\n",
     "\n",
+    "        \n",
     "        # Print table\n",
     "        display(conf_df)\n",
     "    \n",
@@ -641,7 +666,9 @@
     "                (sa_runs_selected['Replicas'] == conf['rep']) &\n",
     "                (sa_runs_selected['TP'] == conf['tp']) &\n",
     "                (sa_runs_selected['Directory'] == conf['dir'])\n",
-    "            ].sort_values(by='Concurrency')\n",
+    "            ].drop('Directory', axis=1).sort_values(by='Concurrency')\n",
+    "\n",
+    "            print(sa_runs_selected.iloc[0]['Directory'])\n",
     "        else:\n",
     "            conf_df = sa_runs_selected[\n",
     "                (sa_runs_selected['Replicas'] == conf['rep']) &\n",
@@ -662,19 +689,20 @@
     "                     list(conf_df.Thpt_per_GPU)[jj]+sa_runs_selected['Thpt_per_GPU'].max()*0.02,\n",
     "                     str(val), ha='center', color=colors[ii%len(colors)])\n",
     "\n",
-    "plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl}  OSL: {osl}')\n",
-    "plt.xlabel('Tok/s/User', fontsize='16')\n",
-    "plt.ylabel('Tok/s/GPU', fontsize='16')\n",
-    "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
-    "plt.grid(True, linewidth=1, ls='--', color='gray')\n",
-    "plt.axis([0, None, 0, None])\n",
-    "plt.show()\n"
+    "if configs:\n",
+    "    plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl}  OSL: {osl}')\n",
+    "    plt.xlabel('Tok/s/User', fontsize='16')\n",
+    "    plt.ylabel('Tok/s/GPU', fontsize='16')\n",
+    "    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
+    "    plt.grid(True, linewidth=1, ls='--', color='gray')\n",
+    "    plt.axis([0, None, 0, None])\n",
+    "    plt.show()\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d5299c3c-65cb-48e3-b381-6fe8b89a26a0",
+   "id": "e1f5bfca-b25a-46d3-84e7-b4fd423c50f6",
    "metadata": {},
    "outputs": [],
    "source": []