Skip to content

Commit 47870d1

Browse files
authored
Add option to generate notebook of plots (#11)
* [COPILOT] refactor extraction code to separate module * format * [COPILOT] consolidate benchmark and phase configs. * refactor extraction to create a 'configuration' * remove unused imports * fix method sig * minor fixes * cleanup * add basic unit tests * add back result summary columns * make callpattern more ergonomic * condense * add cli for summarization * [COPILOT] Add tests * edits for readability * change nan check to warning * format * add summarize run at the end of the run_benchmark loop * [COPILOT] extract plotting functions to new module * [COPILOT] refactor plots * adjust so that we only create fractions for bottleneck patterns, which are defined in a particular way. * make bottleneck patterns more strict * [COPILOT] add nb generation * adjust organization * format * remove duplicate param * rename callpattern * add line number to extraction * add test to ensure we can select correct line * use pipeline call as ex instead * format * updates * format
1 parent a6bf25c commit 47870d1

File tree

7 files changed

+376
-47
lines changed

7 files changed

+376
-47
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
"matplotlib",
5555
"seaborn",
5656
"scalene",
57+
"nbformat>=5.0",
5758
]
5859

5960
setup_requires = ["setuptools_scm"]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Templates for vivarium_profiling."""
2+
3+
from pathlib import Path
4+
5+
TEMPLATES_DIR = Path(__file__).parent
6+
ANALYSIS_NOTEBOOK_TEMPLATE = TEMPLATES_DIR / "analysis_template.ipynb"
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "072e8e0a",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import pandas as pd\n",
11+
"import matplotlib.pyplot as plt\n",
12+
"from pathlib import Path\n",
13+
"from vivarium_profiling.tools.extraction import ExtractionConfig\n",
14+
"from vivarium_profiling.tools import plotting\n",
15+
"\n",
16+
"# Configure matplotlib for notebook\n",
17+
"%matplotlib inline"
18+
]
19+
},
20+
{
21+
"cell_type": "markdown",
22+
"id": "b7058668",
23+
"metadata": {},
24+
"source": [
25+
"## Load Data"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"id": "9241f5cb",
32+
"metadata": {},
33+
"outputs": [],
34+
"source": [
35+
"# Load benchmark results\n",
36+
"benchmark_results_path = Path(r\"{{BENCHMARK_RESULTS_PATH}}\")\n",
37+
"summary_path = Path(r\"{{SUMMARY_PATH}}\")\n",
38+
"\n",
39+
"raw = pd.read_csv(benchmark_results_path)\n",
40+
"summary = pd.read_csv(summary_path)\n",
41+
"\n",
42+
"# Load extraction config\n",
43+
"config = ExtractionConfig()\n",
44+
"\n",
45+
"print(f\"Loaded {len(raw)} raw benchmark results\")\n",
46+
"print(f\"Loaded {len(summary)} model summaries\")\n",
47+
"print(f\"\\nRaw data shape: {raw.shape}\")\n",
48+
"print(f\"Summary data shape: {summary.shape}\")"
49+
]
50+
},
51+
{
52+
"cell_type": "markdown",
53+
"id": "4c47df2b",
54+
"metadata": {},
55+
"source": [
56+
"## Performance Analysis\n",
57+
"\n",
58+
"Overall runtime and memory usage comparison across models."
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": null,
64+
"id": "22bb73fb",
65+
"metadata": {},
66+
"outputs": [],
67+
"source": [
68+
"plotting.create_figures(\n",
69+
" summary,\n",
70+
" output_dir=None,\n",
71+
" chart_title=\"performance_analysis\",\n",
72+
" time_col=\"rt_s\",\n",
73+
" mem_col=\"mem_mb\",\n",
74+
" time_pdiff_col=\"rt_s_pdiff\",\n",
75+
" save=False\n",
76+
")"
77+
]
78+
},
79+
{
80+
"cell_type": "markdown",
81+
"id": "7e31e5f0",
82+
"metadata": {},
83+
"source": [
84+
"## Phase Runtime Analysis\n",
85+
"\n",
86+
"Detailed analysis of individual simulation phases (setup, initialize_simulants, run, finalize, report)."
87+
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": null,
92+
"id": "ee250c94",
93+
"metadata": {},
94+
"outputs": [],
95+
"source": [
96+
"# Get phase metrics from config\n",
97+
"phase_patterns = [p for p in config.patterns if p.cumtime_template == \"rt_{name}_s\"]\n",
98+
"\n",
99+
"for pattern in phase_patterns:\n",
100+
" time_col = pattern.cumtime_col\n",
101+
" time_pdiff_col = f\"{time_col}_pdiff\"\n",
102+
" \n",
103+
" print(f\"\\n=== {pattern.name.upper()} ===\")\n",
104+
" plotting.create_figures(\n",
105+
" summary,\n",
106+
" output_dir=None,\n",
107+
" chart_title=f\"runtime_analysis_{pattern.name}\",\n",
108+
" time_col=time_col,\n",
109+
" mem_col=None,\n",
110+
" time_pdiff_col=time_pdiff_col,\n",
111+
" save=False\n",
112+
" )"
113+
]
114+
},
115+
{
116+
"cell_type": "markdown",
117+
"id": "7f07476e",
118+
"metadata": {},
119+
"source": [
120+
"## Non-Run Time Analysis\n",
121+
"\n",
122+
"Analysis of time spent outside the main run phase (setup, initialization, reporting, etc.)."
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": null,
128+
"id": "0bf6f0d7",
129+
"metadata": {},
130+
"outputs": [],
131+
"source": [
132+
"plotting.create_figures(\n",
133+
" summary,\n",
134+
" output_dir=None,\n",
135+
" chart_title=\"runtime_analysis_non_run\",\n",
136+
" time_col=\"rt_non_run_s\",\n",
137+
" mem_col=None,\n",
138+
" time_pdiff_col=\"rt_non_run_s_pdiff\",\n",
139+
" save=False\n",
140+
")"
141+
]
142+
},
143+
{
144+
"cell_type": "markdown",
145+
"id": "aa16a06d",
146+
"metadata": {},
147+
"source": [
148+
"## Bottleneck Cumulative Time Analysis\n",
149+
"\n",
150+
"Analysis of cumulative time spent in known bottleneck functions (gather_results, pipeline_call, population_get)."
151+
]
152+
},
153+
{
154+
"cell_type": "code",
155+
"execution_count": null,
156+
"id": "691b5377",
157+
"metadata": {},
158+
"outputs": [],
159+
"source": [
160+
"# Get bottleneck patterns from config\n",
161+
"bottleneck_patterns = [\n",
162+
" p for p in config.patterns\n",
163+
" if p.extract_cumtime and p.cumtime_col == f\"{p.name}_cumtime\"\n",
164+
"]\n",
165+
"\n",
166+
"for pattern in bottleneck_patterns:\n",
167+
" time_col = pattern.cumtime_col\n",
168+
" time_pdiff_col = f\"{time_col}_pdiff\"\n",
169+
" \n",
170+
" print(f\"\\n=== {pattern.name.upper()} ===\")\n",
171+
" plotting.create_figures(\n",
172+
" summary,\n",
173+
" output_dir=None,\n",
174+
" chart_title=f\"bottleneck_runtime_analysis_{pattern.name}\",\n",
175+
" time_col=time_col,\n",
176+
" mem_col=None,\n",
177+
" time_pdiff_col=time_pdiff_col,\n",
178+
" save=False\n",
179+
" )"
180+
]
181+
},
182+
{
183+
"cell_type": "markdown",
184+
"id": "4f267afb",
185+
"metadata": {},
186+
"source": [
187+
"## Bottleneck Fractions vs Scale Factor\n",
188+
"\n",
189+
"Fraction of run() time spent in each bottleneck function, plotted against model scale factor."
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": null,
195+
"id": "ddcc58f6",
196+
"metadata": {},
197+
"outputs": [],
198+
"source": [
199+
"plotting.plot_bottleneck_fractions(\n",
200+
" summary,\n",
201+
" output_dir=None,\n",
202+
" config=config,\n",
203+
" metric=\"median\",\n",
204+
" save=False\n",
205+
")"
206+
]
207+
}
208+
],
209+
"metadata": {
210+
"language_info": {
211+
"name": "python"
212+
}
213+
},
214+
"nbformat": 4,
215+
"nbformat_minor": 5
216+
}

src/vivarium_profiling/tools/cli.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,19 @@ def _expand_model_specs(model_patterns: list[str]) -> list[Path]:
293293
is_flag=True,
294294
help="Drop into python debugger if an error occurs.",
295295
)
296+
@click.option(
297+
"--nb",
298+
is_flag=True,
299+
help=(
300+
"Generate a Jupyter notebook for interactive analysis. "
301+
"If summary.csv already exists, skip summary generation."
302+
),
303+
)
296304
def summarize(
297305
benchmark_results: str,
298306
verbose: int,
299307
with_debugger: bool,
308+
nb: bool,
300309
) -> None:
301310
"""Summarize benchmark results and create visualizations.
302311
@@ -311,10 +320,13 @@ def summarize(
311320
- bottleneck_runtime_analysis_*.png: Bottleneck cumtime charts
312321
- bottleneck_fraction_*.png: Bottleneck fraction scaling charts
313322
323+
If --nb is specified, also creates:
324+
- analysis.ipynb: Interactive Jupyter notebook with all plots
325+
314326
Example usage:
315327
summarize results/profile_2026_01_07/benchmark_results.csv
316328
"""
317329
configure_logging_to_terminal(verbose)
318330
benchmark_results_path = Path(benchmark_results)
319331
main = handle_exceptions(run_summarize_analysis, logger, with_debugger=with_debugger)
320-
main(benchmark_results_path)
332+
main(benchmark_results_path, nb=nb)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""Generate Jupyter notebooks for interactive benchmark analysis."""
2+
3+
from pathlib import Path
4+
5+
import nbformat as nbf
6+
from loguru import logger
7+
8+
from vivarium_profiling.templates import ANALYSIS_NOTEBOOK_TEMPLATE
9+
10+
NOTEBOOK_NAME = "analysis.ipynb"
11+
12+
13+
def create_analysis_notebook(
14+
benchmark_results_path: Path,
15+
summary_path: Path,
16+
output_path: Path,
17+
) -> None:
18+
"""Create a Jupyter notebook for interactive benchmark analysis.
19+
20+
Loads a template notebook and substitutes file paths.
21+
22+
Parameters
23+
----------
24+
benchmark_results_path
25+
Path to benchmark_results.csv file.
26+
summary_path
27+
Path to summary.csv file.
28+
output_path
29+
Path where the notebook should be saved (e.g., analysis.ipynb).
30+
config
31+
Extraction configuration (currently unused, kept for API consistency).
32+
33+
"""
34+
# Define substitutions
35+
substitutions = {
36+
"{{BENCHMARK_RESULTS_PATH}}": str(benchmark_results_path),
37+
"{{SUMMARY_PATH}}": str(summary_path),
38+
}
39+
40+
# Load template
41+
with open(ANALYSIS_NOTEBOOK_TEMPLATE) as f:
42+
nb = nbf.read(f, as_version=4)
43+
44+
# Apply substitutions to all code cells
45+
for cell in nb.cells:
46+
if cell.cell_type == "code":
47+
source = cell.source
48+
for placeholder, value in substitutions.items():
49+
source = source.replace(placeholder, value)
50+
cell.source = source
51+
52+
# Save the notebook
53+
with open(output_path, "w") as f:
54+
nbf.write(nb, f)
55+
56+
logger.info(f"Created analysis notebook: {output_path}")

0 commit comments

Comments
 (0)