Adjust how model specs are passed in

patricktnast · patricktnast · commit bc3b0bb53d21 · 2026-01-26T09:44:43.000-08:00
diff --git a/README.rst b/README.rst
@@ -178,7 +178,7 @@ depending on the profiler backend provided. By default, runtime profiling is per
 you can also use ``scalene`` for more detailed call stack analysis.
 
 The ``run_benchmark`` command runs multiple iterations of one or more model specification, in order to compare
-the results. It requires at least one baseline model (specified as ``model_spec_baseline.yaml``) for comparison,
+the results. It requires at least one baseline model for comparison,
 and any other number of 'experiment' models to benchmark against the baseline, which can be passed via glob patterns.
 You can separately configure the sample size of runs for the baseline and experiment models. The command aggregates
 the profiling results and generates summary statistics and visualizations for a default set of important function calls
diff --git a/src/vivarium_profiling/tools/cli.py b/src/vivarium_profiling/tools/cli.py
@@ -200,12 +200,16 @@ def make_artifacts(
 
 
 @click.command()
+@click.argument(
+    "model_specification",
+    type=click.Path(exists=True, dir_okay=False, resolve_path=True),
+)
 @click.option(
-    "-m",
-    "--model_specifications",
+    "-a",
+    "--additional-model-specifications",
     multiple=True,
-    required=True,
-    help="Model specification files (supports glob patterns). Can be specified multiple times.",
+    type=click.Path(exists=True, dir_okay=False, resolve_path=True),
+    help="Additional model specification files (supports glob patterns). Can be specified multiple times.",
 )
 @click.option(
     "-r",
@@ -217,8 +221,9 @@ def make_artifacts(
 @click.option(
     "-b",
     "--baseline-model-runs",
+    default=3,
+    show_default=True,
     type=int,
-    required=True,
     help="Number of runs for baseline model.",
 )
 @click.option(
@@ -242,7 +247,8 @@ def make_artifacts(
     help="Drop into python debugger if an error occurs.",
 )
 def run_benchmark(
-    model_specifications: tuple[str, ...],
+    model_specification: str,
+    additional_model_specifications: tuple[str, ...],
     model_runs: int,
     baseline_model_runs: int,
     output_dir: str,
@@ -255,11 +261,21 @@ def run_benchmark(
     This command profiles multiple model specifications and collects runtime
     and memory usage statistics. Results are saved to a timestamped CSV file.
 
+    The baseline model specification is provided as a positional argument.
+    Additional model specifications can be provided with -a.
+
     Example usage:
-        run_benchmark -m "model_spec_baseline.yaml" -m "model_spec_*.yaml" -r 10 -b 20
+        run_benchmark model_spec_baseline.yaml -b 20
+
+        run_benchmark model_spec_baseline.yaml -a model_spec_2x.yaml -a model_spec_4x.yaml -r 10 -b 20
     """
-    # Expand model patterns
-    model_specifications = _expand_model_specs(list(model_specifications))
+    configure_logging_to_terminal(verbose)
+
+    baseline_path = Path(model_specification)
+
+    # Expand additional model specs (supporting glob patterns)
+    additional_paths = _expand_model_specs(list(additional_model_specifications))
+    model_specifications = [str(baseline_path)] + [str(p) for p in additional_paths]
 
     # Run benchmarks with error handling
     main = handle_exceptions(run_benchmark_loop, logger, with_debugger=with_debugger)
@@ -274,7 +290,22 @@ def run_benchmark(
 
 
 def _expand_model_specs(model_patterns: list[str]) -> list[Path]:
-    """Expand glob patterns and validate model spec files."""
+    """Expand glob patterns and validate model spec files.
+
+    Parameters
+    ----------
+    model_patterns
+        List of file paths or glob patterns.
+
+    Returns
+    -------
+        List of resolved Path objects for existing files. Returns empty list
+        if no patterns provided.
+
+    """
+    if not model_patterns:
+        return []
+
     models = []
     for pattern in model_patterns:
         expanded = glob.glob(pattern)
@@ -287,11 +318,6 @@ def _expand_model_specs(model_patterns: list[str]) -> list[Path]:
             if path.is_file():
                 models.append(path)
 
-    if not models:
-        raise click.ClickException(
-            f"No model specification files found for patterns: {model_patterns}"
-        )
-
     return models
 
 
diff --git a/src/vivarium_profiling/tools/run_benchmark.py b/src/vivarium_profiling/tools/run_benchmark.py
@@ -22,15 +22,6 @@
 RESULTS_SUMMARY_NAME = "benchmark_results.csv"
 
 
-def validate_baseline_model(models: list[Path]) -> None:
-    """Validate that one of the model specs is the baseline."""
-    baseline_found = "model_spec_baseline.yaml" in [model.name for model in models]
-    if not baseline_found:
-        raise click.ClickException(
-            "Error: One of the model specs must be 'model_spec_baseline.yaml'."
-        )
-
-
 def create_results_directory(output_dir: str = ".") -> str:
     """Create a timestamped results directory."""
     timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
@@ -180,9 +171,6 @@ def run_benchmark_loop(
 
     configure_logging_to_terminal(verbose)
 
-    # Validate inputs
-    validate_baseline_model(model_specifications)
-
     # Create results directory and initialize results file
     results_dir = create_results_directory(output_dir)
     results_file = initialize_results_file(results_dir, config)
@@ -193,15 +181,15 @@ def run_benchmark_loop(
     logger.info(f"  Results Directory: {results_dir}")
 
     # Run benchmarks for each specification
-    for spec in model_specifications:
+    for i, spec in enumerate(model_specifications):
         logger.info(f"Running {spec}...")
 
         model_spec_name = spec.stem
         spec_specific_results_dir = Path(results_dir) / model_spec_name
         spec_specific_results_dir.mkdir(parents=True, exist_ok=True)
 
-        # Determine number of runs
-        if spec.name == "model_spec_baseline.yaml":
+        # Determine number of runs - first spec is baseline
+        if i == 0:
             num_runs = baseline_model_runs
         else:
             num_runs = model_runs
diff --git a/tests/test_run_benchmark.py b/tests/test_run_benchmark.py
@@ -104,24 +104,3 @@ def test_run_benchmark_loop_integration(test_model_specs: list[Path], tmp_path:
         assert (
             len(spec_contents) > 0
         ), f"Model spec directory {spec_path} should contain results"
-
-
-def test_run_benchmark_loop_validation_error(test_model_specs: list[Path], tmp_path: Path):
-    """Test that benchmark fails appropriately when baseline model is missing."""
-    output_dir = str(tmp_path / "validation_test")
-    Path(output_dir).mkdir(parents=True, exist_ok=True)
-
-    # Try to run without baseline model - should raise exception
-    model_specs = test_model_specs[1:]
-
-    with pytest.raises(
-        click.ClickException,
-        match="Error: One of the model specs must be 'model_spec_baseline.yaml'.",
-    ):  # Should raise ClickException about missing baseline
-        run_benchmark_loop(
-            model_specifications=model_specs,
-            model_runs=2,
-            baseline_model_runs=2,
-            output_dir=output_dir,
-            verbose=0,
-        )