Skip to content

Commit e391d47

Browse files
committed
[COPILOT] add nb generation
1 parent 305a9a1 commit e391d47

File tree

7 files changed

+385
-53
lines changed

7 files changed

+385
-53
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
"matplotlib",
5555
"seaborn",
5656
"scalene",
57+
"nbformat>=5.0",
5758
]
5859

5960
setup_requires = ["setuptools_scm"]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Templates for vivarium_profiling."""
2+
3+
from pathlib import Path
4+
5+
TEMPLATES_DIR = Path(__file__).parent
6+
ANALYSIS_NOTEBOOK_TEMPLATE = TEMPLATES_DIR / "analysis_template.ipynb"
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "072e8e0a",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import pandas as pd\n",
11+
"import matplotlib.pyplot as plt\n",
12+
"from pathlib import Path\n",
13+
"from vivarium_profiling.tools.extraction import ExtractionConfig\n",
14+
"from vivarium_profiling.tools import plotting\n",
15+
"\n",
16+
"# Configure matplotlib for notebook\n",
17+
"%matplotlib inline"
18+
]
19+
},
20+
{
21+
"cell_type": "markdown",
22+
"id": "b7058668",
23+
"metadata": {},
24+
"source": [
25+
"## Load Data"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"id": "9241f5cb",
32+
"metadata": {},
33+
"outputs": [],
34+
"source": [
35+
"# Load benchmark results\n",
36+
"benchmark_results_path = Path(r\"{{BENCHMARK_RESULTS_PATH}}\")\n",
37+
"summary_path = Path(r\"{{SUMMARY_PATH}}\")\n",
38+
"\n",
39+
"raw = pd.read_csv(benchmark_results_path)\n",
40+
"summary = pd.read_csv(summary_path)\n",
41+
"\n",
42+
"# Load extraction config\n",
43+
"config = ExtractionConfig()\n",
44+
"\n",
45+
"print(f\"Loaded {len(raw)} raw benchmark results\")\n",
46+
"print(f\"Loaded {len(summary)} model summaries\")\n",
47+
"print(f\"\\nRaw data shape: {raw.shape}\")\n",
48+
"print(f\"Summary data shape: {summary.shape}\")"
49+
]
50+
},
51+
{
52+
"cell_type": "markdown",
53+
"id": "4c47df2b",
54+
"metadata": {},
55+
"source": [
56+
"## Performance Analysis\n",
57+
"\n",
58+
"Overall runtime and memory usage comparison across models."
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": null,
64+
"id": "22bb73fb",
65+
"metadata": {},
66+
"outputs": [],
67+
"source": [
68+
"plotting.create_figures(\n",
69+
" summary,\n",
70+
" output_dir=None,\n",
71+
" chart_title=\"performance_analysis\",\n",
72+
" time_col=\"rt_s\",\n",
73+
" mem_col=\"mem_mb\",\n",
74+
" time_pdiff_col=\"rt_s_pdiff\",\n",
75+
" save=False\n",
76+
")"
77+
]
78+
},
79+
{
80+
"cell_type": "markdown",
81+
"id": "7e31e5f0",
82+
"metadata": {},
83+
"source": [
84+
"## Phase Runtime Analysis\n",
85+
"\n",
86+
"Detailed analysis of individual simulation phases (setup, initialize_simulants, run, finalize, report)."
87+
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": null,
92+
"id": "ee250c94",
93+
"metadata": {},
94+
"outputs": [],
95+
"source": [
96+
"# Get phase metrics from config\n",
97+
"phase_patterns = [p for p in config.patterns if p.cumtime_template == \"rt_{name}_s\"]\n",
98+
"\n",
99+
"for pattern in phase_patterns:\n",
100+
" time_col = pattern.cumtime_col\n",
101+
" time_pdiff_col = f\"{time_col}_pdiff\"\n",
102+
" \n",
103+
" print(f\"\\n=== {pattern.name.upper()} ===\")\n",
104+
" plotting.create_figures(\n",
105+
" summary,\n",
106+
" output_dir=None,\n",
107+
" chart_title=f\"runtime_analysis_{pattern.name}\",\n",
108+
" time_col=time_col,\n",
109+
" mem_col=None,\n",
110+
" time_pdiff_col=time_pdiff_col,\n",
111+
" save=False\n",
112+
" )"
113+
]
114+
},
115+
{
116+
"cell_type": "markdown",
117+
"id": "7f07476e",
118+
"metadata": {},
119+
"source": [
120+
"## Non-Run Time Analysis\n",
121+
"\n",
122+
"Analysis of time spent outside the main run phase (setup, initialization, reporting, etc.)."
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": null,
128+
"id": "0bf6f0d7",
129+
"metadata": {},
130+
"outputs": [],
131+
"source": [
132+
"plotting.create_figures(\n",
133+
" summary,\n",
134+
" output_dir=None,\n",
135+
" chart_title=\"runtime_analysis_non_run\",\n",
136+
" time_col=\"rt_non_run_s\",\n",
137+
" mem_col=None,\n",
138+
" time_pdiff_col=\"rt_non_run_s_pdiff\",\n",
139+
" save=False\n",
140+
")"
141+
]
142+
},
143+
{
144+
"cell_type": "markdown",
145+
"id": "aa16a06d",
146+
"metadata": {},
147+
"source": [
148+
"## Bottleneck Cumulative Time Analysis\n",
149+
"\n",
150+
"Analysis of cumulative time spent in known bottleneck functions (gather_results, pipeline_call, population_get)."
151+
]
152+
},
153+
{
154+
"cell_type": "code",
155+
"execution_count": null,
156+
"id": "691b5377",
157+
"metadata": {},
158+
"outputs": [],
159+
"source": [
160+
"# Get bottleneck patterns from config\n",
161+
"bottleneck_patterns = [\n",
162+
" p for p in config.patterns\n",
163+
" if p.extract_cumtime and p.cumtime_col == f\"{p.name}_cumtime\"\n",
164+
"]\n",
165+
"\n",
166+
"for pattern in bottleneck_patterns:\n",
167+
" time_col = pattern.cumtime_col\n",
168+
" time_pdiff_col = f\"{time_col}_pdiff\"\n",
169+
" \n",
170+
" print(f\"\\n=== {pattern.name.upper()} ===\")\n",
171+
" plotting.create_figures(\n",
172+
" summary,\n",
173+
" output_dir=None,\n",
174+
" chart_title=f\"bottleneck_runtime_analysis_{pattern.name}\",\n",
175+
" time_col=time_col,\n",
176+
" mem_col=None,\n",
177+
" time_pdiff_col=time_pdiff_col,\n",
178+
" save=False\n",
179+
" )"
180+
]
181+
},
182+
{
183+
"cell_type": "markdown",
184+
"id": "4f267afb",
185+
"metadata": {},
186+
"source": [
187+
"## Bottleneck Fractions vs Scale Factor\n",
188+
"\n",
189+
"Fraction of run() time spent in each bottleneck function, plotted against model scale factor."
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": null,
195+
"id": "ddcc58f6",
196+
"metadata": {},
197+
"outputs": [],
198+
"source": [
199+
"plotting.plot_bottleneck_fractions(\n",
200+
" summary,\n",
201+
" output_dir=None,\n",
202+
" config=config,\n",
203+
" metric=\"median\",\n",
204+
" save=False\n",
205+
")"
206+
]
207+
}
208+
],
209+
"metadata": {
210+
"language_info": {
211+
"name": "python"
212+
}
213+
},
214+
"nbformat": 4,
215+
"nbformat_minor": 5
216+
}

src/vivarium_profiling/tools/cli.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,19 @@ def _expand_model_specs(model_patterns: list[str]) -> list[Path]:
293293
is_flag=True,
294294
help="Drop into python debugger if an error occurs.",
295295
)
296+
@click.option(
297+
"--nb",
298+
is_flag=True,
299+
help=(
300+
"Generate a Jupyter notebook for interactive analysis. "
301+
"If summary.csv already exists, skip summary generation."
302+
),
303+
)
296304
def summarize(
297305
benchmark_results: str,
298306
verbose: int,
299307
with_debugger: bool,
308+
nb: bool,
300309
) -> None:
301310
"""Summarize benchmark results and create visualizations.
302311
@@ -311,10 +320,13 @@ def summarize(
311320
- bottleneck_runtime_analysis_*.png: Bottleneck cumtime charts
312321
- bottleneck_fraction_*.png: Bottleneck fraction scaling charts
313322
323+
If --nb is specified, also creates:
324+
- analysis.ipynb: Interactive Jupyter notebook with all plots
325+
314326
Example usage:
315327
summarize results/profile_2026_01_07/benchmark_results.csv
316328
"""
317329
configure_logging_to_terminal(verbose)
318330
benchmark_results_path = Path(benchmark_results)
319331
main = handle_exceptions(run_summarize_analysis, logger, with_debugger=with_debugger)
320-
main(benchmark_results_path)
332+
main(benchmark_results_path, nb=nb)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""Generate Jupyter notebooks for interactive benchmark analysis."""
2+
3+
from pathlib import Path
4+
5+
import nbformat as nbf
6+
from loguru import logger
7+
8+
from vivarium_profiling.templates import ANALYSIS_NOTEBOOK_TEMPLATE
9+
from vivarium_profiling.tools.extraction import ExtractionConfig
10+
11+
12+
def create_analysis_notebook(
13+
benchmark_results_path: Path,
14+
summary_path: Path,
15+
output_path: Path,
16+
) -> None:
17+
"""Create a Jupyter notebook for interactive benchmark analysis.
18+
19+
Loads a template notebook and substitutes file paths.
20+
21+
Parameters
22+
----------
23+
benchmark_results_path
24+
Path to benchmark_results.csv file.
25+
summary_path
26+
Path to summary.csv file.
27+
output_path
28+
Path where the notebook should be saved (e.g., analysis.ipynb).
29+
config
30+
Extraction configuration (currently unused, kept for API consistency).
31+
32+
"""
33+
# Define substitutions
34+
substitutions = {
35+
"{{BENCHMARK_RESULTS_PATH}}": str(benchmark_results_path),
36+
"{{SUMMARY_PATH}}": str(summary_path),
37+
}
38+
39+
# Load template
40+
with open(ANALYSIS_NOTEBOOK_TEMPLATE) as f:
41+
nb = nbf.read(f, as_version=4)
42+
43+
# Apply substitutions to all code cells
44+
for cell in nb.cells:
45+
if cell.cell_type == "code":
46+
source = cell.source
47+
for placeholder, value in substitutions.items():
48+
source = source.replace(placeholder, value)
49+
cell.source = source
50+
51+
# Save the notebook
52+
with open(output_path, "w") as f:
53+
nbf.write(nb, f)
54+
55+
logger.info(f"Created analysis notebook: {output_path}")

0 commit comments

Comments
 (0)