Skip to content

Commit 72e7552

Browse files
authored
Add yaml configuration/ parsing of Extraction Configuration (#12)
* [COPILOT] refactor extraction code to separate module * format * [COPILOT] consolidate benchmark and phase configs. * refactor extraction to create a 'configuration' * remove unused imports * fix method sig * minor fixes * cleanup * add basic unit tests * add back result summary columns * make callpattern more ergonomic * condense * add cli for summarization * [COPILOT] Add tests * edits for readability * change nan check to warning * format * add summarize run at the end of the run_benchmark loop * [COPILOT] extract plotting functions to new module * [COPILOT] refactor plots * adjust so that we only create fractions for bottleneck patterns, which are defined in a particular way. * make bottleneck patterns more strict * [COPILOT] add nb generation * adjust organization * format * [COPILOT] add explicit extraction configuration * remove preset * remove other examples * consolidate tests * format * add context manager * use a fixture instead * fix typo * remove duplicate param * rename callpattern * add line number to extraction * add test to ensure we can select correct line * use pipeline call as ex instead * format * updates * format * rename callpattern * adjust test * add name to tests
1 parent 47870d1 commit 72e7552

File tree

5 files changed

+391
-5
lines changed

5 files changed

+391
-5
lines changed

extraction_config_example.yaml

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# Example Extraction Configuration
2+
#
3+
# This file demonstrates the YAML syntax for configuring extraction patterns
4+
# that define which profiling metrics to extract from benchmark results.
5+
#
6+
# Usage:
7+
# run_benchmark -m models/*.yaml -r 10 -b 20 --extraction-config extraction_config.yaml
8+
# summarize results/profile_*/benchmark_results.csv --extraction-config extraction_config.yaml
9+
#
10+
# Each pattern defines:
11+
# - What function to match in cProfile output (filename + function_name)
12+
# - Which metrics to extract (cumtime, percall, ncalls) - all optional with defaults
13+
# - How to name the resulting columns (templates) - optional with defaults
14+
#
15+
# Common use cases:
16+
#
17+
# 1. Bottlenecks: Extract all 3 metrics (cumtime, percall, ncalls)
18+
# Use for performance hotspots you want to track in detail
19+
#
20+
# 2. Simulation phases: Extract only cumtime with "rt_{name}_s" column naming
21+
# Use for high-level simulation phases
22+
#
23+
# 3. Custom patterns: Mix and match metrics and templates as needed
24+
25+
patterns:
26+
# ===========================================================================
27+
# BOTTLENECK PATTERNS
28+
# ===========================================================================
29+
# These extract cumtime, percall, and ncalls for detailed bottleneck analysis
30+
31+
- name: gather_results
32+
filename: results/manager.py
33+
function_name: gather_results
34+
extract_cumtime: true
35+
extract_percall: true
36+
extract_ncalls: true
37+
# Generates columns: gather_results_cumtime, gather_results_percall, gather_results_ncalls
38+
39+
- name: pipeline_call
40+
filename: values/pipeline.py
41+
function_name: __call__
42+
extract_cumtime: true
43+
extract_percall: true
44+
extract_ncalls: true
45+
# Generates columns: pipeline_call_cumtime, pipeline_call_percall, pipeline_call_ncalls
46+
47+
- name: population_get
48+
filename: population/population_view.py
49+
function_name: get
50+
extract_cumtime: true
51+
extract_percall: true
52+
extract_ncalls: true
53+
# Generates columns: population_get_cumtime, population_get_percall, population_get_ncalls
54+
55+
# ===========================================================================
56+
# SIMULATION PHASE PATTERNS
57+
# ===========================================================================
58+
# These extract only cumtime for high-level simulation phases
59+
# Uses custom template for runtime column naming: rt_{name}_s
60+
61+
- name: setup
62+
filename: /vivarium/framework/engine.py
63+
function_name: setup
64+
extract_cumtime: true
65+
cumtime_template: "rt_{name}_s"
66+
# extract_percall and extract_ncalls default to false
67+
# Generates column: rt_setup_s
68+
69+
- name: initialize_simulants
70+
filename: /vivarium/framework/engine.py
71+
function_name: initialize_simulants
72+
extract_cumtime: true
73+
cumtime_template: "rt_{name}_s"
74+
# Generates column: rt_initialize_simulants_s
75+
76+
- name: run
77+
filename: /vivarium/framework/engine.py
78+
function_name: run
79+
extract_cumtime: true
80+
cumtime_template: "rt_{name}_s"
81+
# Generates column: rt_run_s
82+
83+
- name: finalize
84+
filename: /vivarium/framework/engine.py
85+
function_name: finalize
86+
extract_cumtime: true
87+
cumtime_template: "rt_{name}_s"
88+
# Generates column: rt_finalize_s
89+
90+
- name: report
91+
filename: /vivarium/framework/engine.py
92+
function_name: report
93+
extract_cumtime: true
94+
cumtime_template: "rt_{name}_s"
95+
# Generates column: rt_report_s
96+
97+
# ===========================================================================
98+
# CUSTOM PATTERN EXAMPLES
99+
# ===========================================================================
100+
101+
# Example: Custom function with selective metric extraction and custom templates
102+
# - name: my_bottleneck
103+
# filename: my/module.py
104+
# function_name: my_function
105+
# extract_cumtime: true
106+
# extract_percall: true
107+
# extract_ncalls: false
108+
# cumtime_template: "{name}_total_time"
109+
# percall_template: "{name}_avg_time"
110+
# # Generates columns: my_bottleneck_total_time, my_bottleneck_avg_time
111+
112+
# Example: Minimal pattern (only cumtime with default template)
113+
# - name: simple_func
114+
# filename: simple/module.py
115+
# function_name: simple_function
116+
# # extract_cumtime defaults to true
117+
# # extract_percall and extract_ncalls default to false
118+
# # cumtime_template defaults to "{name}_cumtime"
119+
# # Generates column: simple_func_cumtime
120+
121+
# ===========================================================================
122+
# FIELD REFERENCE
123+
# ===========================================================================
124+
#
125+
# Required fields:
126+
# name: Logical name used in column name templates (e.g., "setup", "gather_results")
127+
# filename: Path pattern to match the source file (can be partial path)
128+
# function_name: Name of the function to match in cProfile output
129+
#
130+
# Optional fields (with defaults):
131+
# extract_cumtime: Extract cumulative time (default: true)
132+
# extract_percall: Extract time per call (default: false)
133+
# extract_ncalls: Extract number of calls (default: false)
134+
# cumtime_template: Column name template for cumtime (default: "{name}_cumtime")
135+
# percall_template: Column name template for percall (default: "{name}_percall")
136+
# ncalls_template: Column name template for ncalls (default: "{name}_ncalls")
137+
#
138+
# Template variables:
139+
# {name}: Replaced with the pattern's name field
140+
#
141+
# File path matching:
142+
# - Patterns match any path ending with the specified filename
143+
# - Use forward slashes even on Windows
144+
# - Special regex characters (., *, etc.) are automatically escaped
145+
# - Example: "results/manager.py" matches "/full/path/to/results/manager.py"

src/vivarium_profiling/tools/cli.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from vivarium_profiling.constants import metadata, paths
1313
from vivarium_profiling.tools import build_artifacts, configure_logging_to_terminal
14+
from vivarium_profiling.tools.extraction import ExtractionConfig
1415
from vivarium_profiling.tools.run_benchmark import run_benchmark_loop
1516
from vivarium_profiling.tools.summarize import run_summarize_analysis
1617

@@ -228,6 +229,11 @@ def make_artifacts(
228229
type=click.Path(exists=True, file_okay=False, dir_okay=True),
229230
help="Directory where the timestamped results directory will be created.",
230231
)
232+
@click.option(
233+
"--extraction-config",
234+
type=click.Path(exists=True, file_okay=True, dir_okay=False),
235+
help="Path to YAML file defining extraction patterns. If not provided, uses default patterns.",
236+
)
231237
@click.option("-v", "verbose", count=True, help="Configure logging verbosity.")
232238
@click.option(
233239
"--pdb",
@@ -240,6 +246,7 @@ def run_benchmark(
240246
model_runs: int,
241247
baseline_model_runs: int,
242248
output_dir: str,
249+
extraction_config: str | None,
243250
verbose: int,
244251
with_debugger: bool,
245252
) -> None:
@@ -256,7 +263,14 @@ def run_benchmark(
256263

257264
# Run benchmarks with error handling
258265
main = handle_exceptions(run_benchmark_loop, logger, with_debugger=with_debugger)
259-
main(model_specifications, model_runs, baseline_model_runs, output_dir, verbose)
266+
main(
267+
model_specifications,
268+
model_runs,
269+
baseline_model_runs,
270+
output_dir,
271+
extraction_config,
272+
verbose,
273+
)
260274

261275

262276
def _expand_model_specs(model_patterns: list[str]) -> list[Path]:
@@ -293,6 +307,11 @@ def _expand_model_specs(model_patterns: list[str]) -> list[Path]:
293307
is_flag=True,
294308
help="Drop into python debugger if an error occurs.",
295309
)
310+
@click.option(
311+
"--extraction-config",
312+
type=click.Path(exists=True, file_okay=True, dir_okay=False),
313+
help="Path to YAML file defining extraction patterns. If not provided, uses default patterns.",
314+
)
296315
@click.option(
297316
"--nb",
298317
is_flag=True,
@@ -305,6 +324,7 @@ def summarize(
305324
benchmark_results: str,
306325
verbose: int,
307326
with_debugger: bool,
327+
extraction_config: str | None,
308328
nb: bool,
309329
) -> None:
310330
"""Summarize benchmark results and create visualizations.
@@ -327,6 +347,12 @@ def summarize(
327347
summarize results/profile_2026_01_07/benchmark_results.csv
328348
"""
329349
configure_logging_to_terminal(verbose)
350+
351+
# Parse extraction config if provided
352+
config = None
353+
if extraction_config is not None:
354+
config = ExtractionConfig.from_yaml(extraction_config)
355+
330356
benchmark_results_path = Path(benchmark_results)
331357
main = handle_exceptions(run_summarize_analysis, logger, with_debugger=with_debugger)
332-
main(benchmark_results_path, nb=nb)
358+
main(benchmark_results_path, config=config, nb=nb)

src/vivarium_profiling/tools/extraction.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,21 @@
1313
from dataclasses import dataclass
1414
from itertools import chain
1515
from pathlib import Path
16+
from typing import Any
1617

18+
import yaml
1719
from loguru import logger
1820

21+
# Default values for optional FunctionCallConfiguration fields
22+
DEFAULT_PATTERN_CONFIG = {
23+
"extract_cumtime": True,
24+
"extract_percall": False,
25+
"extract_ncalls": False,
26+
"cumtime_template": "{name}_cumtime",
27+
"percall_template": "{name}_percall",
28+
"ncalls_template": "{name}_ncalls",
29+
}
30+
1931

2032
@dataclass
2133
class FunctionCallConfiguration:
@@ -203,6 +215,78 @@ def __init__(self, patterns: list[FunctionCallConfiguration] | None = None):
203215
patterns = DEFAULT_BOTTLENECKS + DEFAULT_PHASES
204216
self.patterns = patterns
205217

218+
@classmethod
219+
def from_yaml(cls, yaml_path: str | Path) -> ExtractionConfig:
220+
"""Create an ExtractionConfig from a YAML file.
221+
222+
Parameters
223+
----------
224+
yaml_path
225+
Path to the YAML configuration file.
226+
227+
Returns
228+
-------
229+
ExtractionConfig object with patterns defined in the YAML file.
230+
231+
Raises
232+
------
233+
ValueError
234+
If the YAML file is invalid or missing required fields.
235+
FileNotFoundError
236+
If the YAML file doesn't exist.
237+
238+
Examples
239+
--------
240+
YAML format::
241+
242+
patterns:
243+
- name: custom_func
244+
filename: my/module.py
245+
function_name: my_function
246+
extract_cumtime: true
247+
extract_percall: true
248+
cumtime_template: "custom_{name}_time"
249+
250+
"""
251+
yaml_path = Path(yaml_path)
252+
if not yaml_path.exists():
253+
raise FileNotFoundError(f"YAML config file not found: {yaml_path}")
254+
255+
with open(yaml_path, "r") as f:
256+
config_data = yaml.safe_load(f)
257+
258+
if not isinstance(config_data, dict) or "patterns" not in config_data:
259+
raise ValueError(
260+
"YAML file must contain a 'patterns' key with a list of pattern definitions"
261+
)
262+
263+
patterns = []
264+
for i, pattern_dict in enumerate(config_data["patterns"]):
265+
if not isinstance(pattern_dict, dict):
266+
raise ValueError(f"Pattern at index {i} must be a dictionary")
267+
268+
# Merge defaults with user config
269+
merged_config = DEFAULT_PATTERN_CONFIG.copy()
270+
merged_config.update(pattern_dict)
271+
272+
# Validate required fields
273+
if "name" not in merged_config:
274+
raise ValueError(f"Pattern at index {i} is missing required field 'name'")
275+
276+
name = merged_config["name"]
277+
278+
if "filename" not in merged_config or "function_name" not in merged_config:
279+
raise ValueError(
280+
f"Pattern '{name}' requires 'filename' and 'function_name' fields"
281+
)
282+
283+
# Create FunctionCallConfiguration from merged config
284+
pattern = FunctionCallConfiguration(**merged_config)
285+
286+
patterns.append(pattern)
287+
288+
return cls(patterns=patterns)
289+
206290
@property
207291
def metric_names(self) -> list[str]:
208292
"""Get the names of all configured metrics."""

src/vivarium_profiling/tools/run_benchmark.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def run_benchmark_loop(
148148
model_runs: int,
149149
baseline_model_runs: int,
150150
output_dir: str = ".",
151+
extraction_config: str | Path | None = None,
151152
verbose: int = 0,
152153
) -> str:
153154
"""Main function to run benchmarks on model specifications.
@@ -162,17 +163,20 @@ def run_benchmark_loop(
162163
Number of runs for the baseline model.
163164
output_dir
164165
Directory to save results.
166+
extraction_config
167+
Path to YAML file defining extraction patterns. If None, uses default patterns.
165168
verbose
166169
Verbosity level for logging.
167-
config
168-
Extraction configuration. Defaults to DEFAULT_CONFIG.
169170
170171
Returns
171172
-------
172173
Path to the results directory.
173174
174175
"""
175-
config = ExtractionConfig()
176+
if extraction_config is not None:
177+
config = ExtractionConfig.from_yaml(extraction_config)
178+
else:
179+
config = ExtractionConfig()
176180

177181
configure_logging_to_terminal(verbose)
178182

0 commit comments

Comments
 (0)