|
| 1 | +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES |
| 2 | +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | + |
| 17 | +from __future__ import annotations |
| 18 | + |
| 19 | +import logging |
| 20 | +from dataclasses import dataclass |
| 21 | +from pathlib import Path |
| 22 | +from typing import TYPE_CHECKING |
| 23 | + |
| 24 | +import jinja2 |
| 25 | +import toml |
| 26 | +from rich.console import Console |
| 27 | +from rich.table import Table |
| 28 | + |
| 29 | +from cloudai.core import Reporter, System, TestScenario |
| 30 | +from cloudai.models.scenario import ReportConfig |
| 31 | +from cloudai.util.lazy_imports import lazy |
| 32 | + |
| 33 | +from .nixl_bench import NIXLBenchTestDefinition |
| 34 | + |
| 35 | +if TYPE_CHECKING: |
| 36 | + import bokeh.plotting as bk |
| 37 | + import pandas as pd |
| 38 | + |
| 39 | + |
| 40 | +@dataclass |
| 41 | +class TdefResult: |
| 42 | + """Convenience class for storing test definition and dataframe results.""" |
| 43 | + |
| 44 | + tdef: NIXLBenchTestDefinition |
| 45 | + results: pd.DataFrame |
| 46 | + |
| 47 | + |
| 48 | +class NIXLBenchSummaryReport(Reporter): |
| 49 | + """Summary report for NIXL Bench.""" |
| 50 | + |
| 51 | + def __init__(self, system: System, test_scenario: TestScenario, results_root: Path, config: ReportConfig) -> None: |
| 52 | + super().__init__(system, test_scenario, results_root, config) |
| 53 | + self.tdef_res: list[TdefResult] = [] |
| 54 | + self.metric2col = { |
| 55 | + "avg_lat": "Avg. Latency (us)", |
| 56 | + "bw_gb_sec": "Bandwidth (GB/sec)", |
| 57 | + } |
| 58 | + self.report_configs = [ |
| 59 | + ("READ", "bw_gb_sec"), |
| 60 | + ("WRITE", "bw_gb_sec"), |
| 61 | + ("READ", "avg_lat"), |
| 62 | + ("WRITE", "avg_lat"), |
| 63 | + ] |
| 64 | + |
| 65 | + def generate(self) -> None: |
| 66 | + self.load_tdef_with_results() |
| 67 | + |
| 68 | + console = Console(record=True) |
| 69 | + for op_type, metric in self.report_configs: |
| 70 | + table = self.create_table(op_type, metric) |
| 71 | + console.print(table) |
| 72 | + console.print() |
| 73 | + |
| 74 | + bokeh_script, bokeh_div = self.get_bokeh_html() |
| 75 | + |
| 76 | + template = jinja2.Environment( |
| 77 | + loader=jinja2.FileSystemLoader(Path(__file__).parent.parent.parent / "util") |
| 78 | + ).get_template("nixl_report_template.jinja2") |
| 79 | + html_content = template.render( |
| 80 | + title=f"{self.test_scenario.name} NIXL Bench Report", |
| 81 | + bokeh_script=bokeh_script, |
| 82 | + bokeh_div=bokeh_div, |
| 83 | + rich_html=console.export_html(), |
| 84 | + ) |
| 85 | + |
| 86 | + html_file = self.results_root / "nixl_summary.html" |
| 87 | + with open(html_file, "w") as f: |
| 88 | + f.write(html_content) |
| 89 | + |
| 90 | + logging.info(f"NIXL summary report created: {html_file}") |
| 91 | + |
| 92 | + def load_tdef_with_results(self) -> None: |
| 93 | + super().load_test_runs() |
| 94 | + self.trs = [tr for tr in self.trs if isinstance(tr.test.test_definition, NIXLBenchTestDefinition)] |
| 95 | + |
| 96 | + for tr in self.trs: |
| 97 | + tr_file = toml.load(tr.output_path / "test-run.toml") |
| 98 | + tdef = NIXLBenchTestDefinition.model_validate(tr_file["test_definition"]) |
| 99 | + self.tdef_res.append(TdefResult(tdef, lazy.pd.read_csv(tr.output_path / "nixlbench.csv"))) |
| 100 | + |
| 101 | + def create_table(self, op_type: str, metric: str) -> Table: |
| 102 | + df = self.construct_df(op_type, metric) |
| 103 | + table = Table(title=f"{self.test_scenario.name}: {op_type} {self.metric2col[metric]}", title_justify="left") |
| 104 | + for col in df.columns: |
| 105 | + table.add_column(col, justify="right", style="cyan") |
| 106 | + |
| 107 | + for _, row in df.iterrows(): |
| 108 | + block_size = row["block_size"].astype(int) |
| 109 | + batch_size = row["batch_size"].astype(int) |
| 110 | + table.add_row(str(block_size), str(batch_size), *[str(x) for x in row.values[2:]]) |
| 111 | + return table |
| 112 | + |
| 113 | + def get_bokeh_html(self) -> tuple[str, str]: |
| 114 | + charts: list[bk.figure] = [] |
| 115 | + for op_type, metric in self.report_configs: |
| 116 | + if chart := self.create_chart(op_type, metric): |
| 117 | + charts.append(chart) |
| 118 | + |
| 119 | + # layout with 2 charts per row |
| 120 | + rows = [] |
| 121 | + for i in range(0, len(charts), 2): |
| 122 | + if i + 1 < len(charts): |
| 123 | + rows.append(lazy.bokeh_layouts.row(charts[i], charts[i + 1])) |
| 124 | + else: |
| 125 | + rows.append(lazy.bokeh_layouts.row(charts[i])) |
| 126 | + layout = lazy.bokeh_layouts.column(*rows, name="charts_layout") |
| 127 | + |
| 128 | + bokeh_script, bokeh_div = lazy.bokeh_embed.components(layout) |
| 129 | + return bokeh_script, bokeh_div |
| 130 | + |
| 131 | + def construct_df(self, op_type: str, metric: str) -> pd.DataFrame: |
| 132 | + """ |
| 133 | + Construct a `DataFrame` with results for all test runs. |
| 134 | +
|
| 135 | + Block size and Batch size are taken only once assuming they are the same across all test runs. |
| 136 | + `op_type` is used to filter the test runs. |
| 137 | + """ |
| 138 | + final_df = lazy.pd.DataFrame() |
| 139 | + |
| 140 | + for tdef_res in self.tdef_res: |
| 141 | + if tdef_res.tdef.cmd_args_dict.get("op_type", "unset") != op_type: |
| 142 | + continue |
| 143 | + if final_df.empty: |
| 144 | + final_df["block_size"] = tdef_res.results["block_size"].astype(int) |
| 145 | + final_df["batch_size"] = tdef_res.results["batch_size"].astype(int) |
| 146 | + |
| 147 | + col_name = ( |
| 148 | + f"{tdef_res.tdef.cmd_args_dict.get('initiator_seg_type', 'unset')}->" |
| 149 | + f"{tdef_res.tdef.cmd_args_dict.get('target_seg_type', 'unset')}" |
| 150 | + ) |
| 151 | + final_df[col_name] = tdef_res.results[metric].astype(float) |
| 152 | + |
| 153 | + return final_df |
| 154 | + |
| 155 | + def create_chart(self, op_type: str, metric: str) -> bk.figure | None: |
| 156 | + df = self.construct_df(op_type, metric) |
| 157 | + if df.empty: |
| 158 | + logging.warning(f"Empty DataFrame for {op_type} {metric}") |
| 159 | + return None |
| 160 | + |
| 161 | + numeric_cols = [col for col in df.columns if col not in ["block_size", "batch_size"]] |
| 162 | + grouped_df = df.groupby("block_size")[numeric_cols].mean() |
| 163 | + grouped_df = grouped_df.reset_index() |
| 164 | + |
| 165 | + colors = ["blue", "red", "green", "orange", "purple", "brown", "pink", "gray"] |
| 166 | + y_columns = [(col, colors[i % len(colors)]) for i, col in enumerate(numeric_cols)] |
| 167 | + |
| 168 | + p = lazy.bokeh_plotting.figure( |
| 169 | + title=f"{op_type} {self.metric2col[metric]} vs Block Size", |
| 170 | + x_axis_label="Block Size", |
| 171 | + y_axis_label=self.metric2col[metric], |
| 172 | + width=800, |
| 173 | + height=500, |
| 174 | + tools="pan,box_zoom,wheel_zoom,reset,save", |
| 175 | + active_drag="pan", |
| 176 | + active_scroll="wheel_zoom", |
| 177 | + x_axis_type="log", |
| 178 | + ) |
| 179 | + |
| 180 | + hover = lazy.bokeh_models.HoverTool( |
| 181 | + tooltips=[("Block Size", "@x"), ("Value", "@y"), ("Segment Type", "@segment_type")] |
| 182 | + ) |
| 183 | + p.add_tools(hover) |
| 184 | + |
| 185 | + for col, color in y_columns: |
| 186 | + source = lazy.bokeh_models.ColumnDataSource( |
| 187 | + data={ |
| 188 | + "x": grouped_df["block_size"].tolist(), |
| 189 | + "y": grouped_df[col].tolist(), |
| 190 | + "segment_type": [col] * len(grouped_df), |
| 191 | + } |
| 192 | + ) |
| 193 | + |
| 194 | + p.line("x", "y", source=source, line_color=color, line_width=2, legend_label=col) |
| 195 | + p.scatter("x", "y", source=source, fill_color=color, size=8, legend_label=col) |
| 196 | + |
| 197 | + p.legend.location = "top_left" |
| 198 | + p.legend.click_policy = "hide" |
| 199 | + |
| 200 | + y_max = grouped_df[numeric_cols].max().max() |
| 201 | + y_min = grouped_df[numeric_cols].min().min() |
| 202 | + p.y_range = lazy.bokeh_models.Range1d(start=y_min * -1 * y_max * 0.01, end=y_max * 1.1) |
| 203 | + |
| 204 | + return p |
0 commit comments