fix(sdk): unify behavior between cli and sdk to auto save results (#272)

nina-xu · web-flow · commit 8e665ca83cc8 · 2026-03-20T14:49:16.000-04:00
# Summary  This PR fixes row 20 from the [bug bash feedback](https://docs.google.com/spreadsheets/d/1fmFYl89hUCNMROR3B_hnq4oqaqM8tAQJJc7p2P17ep4/edit?gid=0#gid=0). Previously, cli calls `.save_results()` automatically, while for sdk `.save_results()` need to be explicitly called. With this change, we call `.save_results()` in `.run()`, which applies to both cli and sdk. ## Pre-Review Checklist   Ensure that the following pass: - [x] `make format && make check` or via prek validation. - [x] `make test` passes locally - [x] `make test-e2e` passes locally - [ ] `make test-ci-container` passes locally (recommended) - [ ] GPU CI status check passes -- comment `/sync` on this PR to trigger a run (auto-triggers on ready-for-review) ## Pre-Merge Checklist    - [x] New or updated tests for any fix or new behavior - [x] Updated documentation for new features and behaviors, including docstrings for API docs. ## Testing - CLI - default output path: `safe-synthesizer run --config /root/configs/quick-tinyllama-unsloth.yaml --data-source /root/datasets/clinc_oos.csv`. Everything's there: <img width="298" height="295" alt="image" src="https://github.com/user-attachments/assets/b02547b1-a98b-4cff-b2be-dc7ced8bda66" /> - output file override: `safe-synthesizer run --config /root/configs/quick-tinyllama-unsloth.yaml --data-source /root/datasets/clinc_oos.csv --output-file /root/Safe-Synthesizer/safe-synthesizer-artifacts/output-path-override/synth.csv`: everything else is still in the default path, except for the generated csv <img width="300" height="402" alt="image" src="https://github.com/user-attachments/assets/1a710b64-dab0-4592-9c4d-1e96eed2a85d" /> - SDK - ran the 101 notebook: ``` from nemo_safe_synthesizer.sdk.library_builder import SafeSynthesizer builder = SafeSynthesizer().with_data_source(df).with_replace_pii(enable=False).with_train(num_input_records_to_sample=1000).resolve() builder.run() results = builder.results ``` everything's there <img width="293" height="331" alt="image" src="https://github.com/user-attachments/assets/6b1b1b4b-a00a-4366-9282-da6ad2e0230e" /> --------- Signed-off-by: nina-xu <19981858+nina-xu@users.noreply.github.com>
diff --git a/STYLE_GUIDE.md b/STYLE_GUIDE.md
@@ -357,11 +357,10 @@ Standard context managers (`with open(...)`, `with lock:`) are fine when they fi
 
 ```python
 try:
-    ss.run()
-    ss.save_results(workdir)
+    nss.run()  # saves results automatically
 finally:
-    if hasattr(ss, "generator") and ss.generator is not None:
-        ss.generator.teardown()
+    if hasattr(nss, "generator") and nss.generator is not None:
+        nss.generator.teardown()
 ```
 
 For backends with expensive resources, use the `_torn_down` guard pattern:
diff --git a/docs/user-guide/running.md b/docs/user-guide/running.md
@@ -960,10 +960,11 @@ the CLI Commands section for all options.
         Path("./safe-synthesizer-artifacts/myconfig---mydata/2026-01-15T12:00:00")
     )
     synthesizer = SafeSynthesizer(config, workdir=workdir)
-    synthesizer.process_data()
     synthesizer.load_from_save_path()
+    synthesizer.process_data()
     synthesizer.generate()
     synthesizer.evaluate()
+    synthesizer.save_results()
     ```
 
 ### Stepwise execution (SDK)
@@ -1031,11 +1032,15 @@ Key outputs:
 
 ### SDK Results Access
 
+`run()` automatically saves `synthetic_data.csv` and `evaluation_report.html`
+to the artifacts directory unless an `output_file` override is provided.
+For stepwise execution, call `save_results()` explicitly after `evaluate()`.
+
 ```python
 results = synthesizer.results
 df = results.synthetic_data
 summary = results.summary
-synthesizer.save_results()
+# synthesizer.save_results()  # only needed for stepwise execution; run() saves automatically
 ```
 
 ### Cleaning Up
diff --git a/src/nemo_safe_synthesizer/cli/run.py b/src/nemo_safe_synthesizer/cli/run.py
@@ -218,19 +218,18 @@ def run(
     with traced_user("SafeSynthesizer"):
         from ..sdk.library_builder import SafeSynthesizer
 
-        ss: SafeSynthesizer = SafeSynthesizer(config=config, workdir=workdir).with_data_source(df)
-        # ss.run() calls train + generate + evaluate. The generate step has its own try/finally,
-        # but train or evaluate failures leave the generator loaded; this guard ensures teardown
-        # on all exit paths of the full pipeline.
+        nss: SafeSynthesizer = SafeSynthesizer(config=config, workdir=workdir).with_data_source(df)
+        # nss.run() calls train + generate + evaluate + save_results. The generate step has its
+        # own try/finally, but train or evaluate failures leave the generator loaded; this guard
+        # ensures teardown on all exit paths of the full pipeline.
         try:
-            ss.run()
-            ss.save_results(output_file=settings.output_file or workdir.output_file)
-            ss.results.summary.log_summary(run_logger)
-            ss.results.summary.timing.log_timing(run_logger)
-            ss.results.summary.log_wandb()
+            nss.run(output_file=settings.output_file)
+            nss.results.summary.log_summary(run_logger)
+            nss.results.summary.timing.log_timing(run_logger)
+            nss.results.summary.log_wandb()
         finally:
-            if hasattr(ss, "generator") and ss.generator is not None:
-                ss.generator.teardown()
+            if hasattr(nss, "generator") and nss.generator is not None:
+                nss.generator.teardown()
 
 
 @run.command("train")
@@ -359,25 +358,25 @@ def run_generate(
 
     final_output_file = settings.output_file or workdir.output_file
     with traced_user("SafeSynthesizer"):
-        ss = SafeSynthesizer(config, workdir=workdir)
+        nss = SafeSynthesizer(config, workdir=workdir)
 
         # Only set data source if provided via --data-source
         # Otherwise, load_from_save_path() will load from cached files
         if df is not None:
-            ss = ss.with_data_source(df)
+            nss = nss.with_data_source(df)
 
         try:
-            ss = (
-                ss.load_from_save_path()
+            nss = (
+                nss.load_from_save_path()
                 .process_data()
                 .generate()
                 .evaluate()
                 .save_results(output_file=final_output_file)
             )
-            ss.results.summary.log_summary(run_logger)
-            ss.results.summary.timing.log_timing(run_logger)
+            nss.results.summary.log_summary(run_logger)
+            nss.results.summary.timing.log_timing(run_logger)
             run_logger.info(f"Generation complete. Results saved to: {final_output_file}")
-            ss.results.summary.log_wandb()
+            nss.results.summary.log_wandb()
         finally:
-            if hasattr(ss, "generator") and ss.generator is not None:
-                ss.generator.teardown()
+            if hasattr(nss, "generator") and nss.generator is not None:
+                nss.generator.teardown()
diff --git a/src/nemo_safe_synthesizer/sdk/AGENTS.md b/src/nemo_safe_synthesizer/sdk/AGENTS.md
@@ -45,7 +45,7 @@ Precedence: `kwargs` override `values`; `values` override model defaults. Each `
 - generate(): Chooses `TimeseriesBackend` or `VllmBackend`, initializes, generates.
 - evaluate(): Builds `Evaluator`, compiles `results` via `make_nss_results`.
 
-`run()` calls `process_data().train().generate().evaluate()`.
+`run()` calls `process_data().train().generate().evaluate()` then `save_results()`. Stepwise callers must invoke `save_results()` themselves.
 
 ## Gotchas
 
diff --git a/src/nemo_safe_synthesizer/sdk/library_builder.py b/src/nemo_safe_synthesizer/sdk/library_builder.py
@@ -91,6 +91,7 @@ class SafeSynthesizer(ConfigBuilder):
 
         builder = SafeSynthesizer().with_data_source(df)
         builder.process_data().train().generate().evaluate()
+        builder.save_results()
         results = builder.results
 
     Args:
@@ -439,10 +440,16 @@ def evaluate(self) -> SafeSynthesizer:
         )
         return self
 
-    def run(self) -> None:
-        """Run the full pipeline: ``process_data`` -> ``train`` -> ``generate`` -> ``evaluate``.
+    def run(self, output_file: Path | str | None = None) -> None:
+        """Run the full pipeline and save results.
 
-        For step-by-step control, call the individual methods instead.
+        Executes ``process_data`` -> ``train`` -> ``generate`` ->
+        ``evaluate`` -> ``save_results``.  For step-by-step control,
+        call the individual methods instead.
+
+        Args:
+            output_file: Explicit output path for the synthetic data CSV.
+                Falls back to ``workdir.output_file`` when ``None``.
 
         Raises:
             RuntimeError: If called after ``load_from_save_path()``.
@@ -460,11 +467,15 @@ def run(self) -> None:
             assert isinstance(self._data_source, pd.DataFrame)
 
         self.process_data().train().generate().evaluate()
+        self.save_results(output_file=output_file)
 
     @traced("SafeSynthesizer.save_results", category=LogCategory.RUNTIME, level="INFO")
     def save_results(self, output_file: Path | str | None = None) -> None:
         """Save synthetic data CSV and evaluation report HTML to the workdir.
 
+        Called automatically by ``run()``.  Call explicitly after
+        stepwise execution (``process_data().train().generate().evaluate()``).
+
         Args:
             output_file: Explicit output path for the CSV.  Falls back
                 to ``workdir.output_file`` when ``None``.
@@ -473,7 +484,6 @@ def save_results(self, output_file: Path | str | None = None) -> None:
             assert self.results is not None
             assert isinstance(self.results.synthetic_data, pd.DataFrame)
 
-        # Determine output file path for synthetic data
         match output_file:
             case Path() as p:
                 output_file = p
@@ -482,12 +492,10 @@ def save_results(self, output_file: Path | str | None = None) -> None:
             case _:
                 output_file = self._workdir.output_file
 
-        # Save synthetic data CSV
         output_file.parent.mkdir(parents=True, exist_ok=True)
         self.results.synthetic_data.to_csv(str(output_file), index=False)
         logger.info(f"Saved synthetic data to {output_file}")
 
-        # Save evaluation report HTML if available
         if self.results.evaluation_report_html:
             report_path = self._workdir.evaluation_report
             report_path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/tests/cli/test_run.py b/tests/cli/test_run.py
@@ -131,7 +131,7 @@ def test_run_uses_custom_output_file(
         fixture_session_cache_dir: Path,
         patched_run_dependencies: dict,
     ):
-        """Verify that --output-file overrides default workdir output."""
+        """Verify that --output-file is forwarded to run()."""
         custom_output = tmp_path / "custom_output.csv"
 
         result = cli_runner.invoke(
@@ -147,22 +147,18 @@ def test_run_uses_custom_output_file(
             catch_exceptions=False,
         )
 
-        # Verify save_results was called with the custom output file
         assert result.exit_code == 0
         mock_ss = patched_run_dependencies["safe_synthesizer"]
-        mock_ss.save_results.assert_called_once()
-        actual_output_path = mock_ss.save_results.call_args.kwargs.get("output_file")
-        assert str(actual_output_path) == str(custom_output)
+        mock_ss.run.assert_called_once_with(output_file=str(custom_output))
 
-    def test_run_uses_workdir_output_when_no_override(
+    def test_run_without_output_file_passes_none(
         self,
         cli_runner: CliRunner,
         dummy_csv: Path,
         fixture_session_cache_dir: Path,
-        mock_workdir: MagicMock,
         patched_run_dependencies: dict,
     ):
-        """Verify that workdir.output_file is used when --output-file is not provided."""
+        """Without --output-file, run() is called with output_file=None."""
         result = cli_runner.invoke(
             run,
             [
@@ -174,12 +170,10 @@ def test_run_uses_workdir_output_when_no_override(
             catch_exceptions=False,
         )
 
-        # Verify save_results was called with the workdir's default output file
         assert result.exit_code == 0
         mock_ss = patched_run_dependencies["safe_synthesizer"]
-        mock_ss.save_results.assert_called_once()
-        actual_output_path = mock_ss.save_results.call_args.kwargs.get("output_file")
-        assert str(actual_output_path) == str(mock_workdir.output_file)
+        # Default output path is used if no --output-file is provided
+        mock_ss.run.assert_called_once_with(output_file=None)
 
 
 class TestPathOptions:
diff --git a/tests/sdk/test_builder.py b/tests/sdk/test_builder.py
@@ -1,6 +1,9 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+from pathlib import Path
+from unittest.mock import MagicMock
+
 import pandas as pd
 import pytest
 
@@ -12,6 +15,7 @@
 from nemo_safe_synthesizer.sdk.library_builder import SafeSynthesizer
 
 _SMALL_DF = pd.DataFrame({"a": [1, 2, 3]})
+_REPORT_HTML = "<html><body>report</body></html>"
 
 PATCH_PREFIX = "nemo_safe_synthesizer.sdk.builder"
 
@@ -355,3 +359,49 @@ def test_with_replace_pii_reenable_after_disable():
         ._nss_config
     )
     assert config.replace_pii is not None
+
+
+def _builder_with_mock_results(tmp_path: Path) -> SafeSynthesizer:
+    """Create a SafeSynthesizer with mocked results for save_results testing."""
+    nss = SafeSynthesizer(save_path=tmp_path / "artifacts")
+    nss.results = MagicMock()
+    nss.results.synthetic_data = _SMALL_DF
+    nss.results.evaluation_report_html = _REPORT_HTML
+    return nss
+
+
+class TestSaveResults:
+    """Verify save_results persists CSV and HTML to the expected paths."""
+
+    def test_saves_to_default_workdir(self, tmp_path: Path):
+        nss = _builder_with_mock_results(tmp_path)
+
+        nss.save_results()
+
+        csv_path = nss._workdir.output_file
+        report_path = nss._workdir.evaluation_report
+        assert csv_path.exists()
+        assert report_path.exists()
+        assert pd.read_csv(csv_path).equals(_SMALL_DF)
+        assert report_path.read_text() == _REPORT_HTML
+
+    def test_output_file_override_writes_csv_to_custom_path(self, tmp_path: Path):
+        nss = _builder_with_mock_results(tmp_path)
+        custom_csv = tmp_path / "custom" / "output.csv"
+
+        nss.save_results(output_file=custom_csv)
+
+        assert custom_csv.exists()
+        assert pd.read_csv(custom_csv).equals(_SMALL_DF)
+        # Report still goes to the workdir regardless of output_file
+        assert nss._workdir.evaluation_report.exists()
+        assert nss._workdir.evaluation_report.read_text() == _REPORT_HTML
+
+    def test_skips_report_when_html_is_none(self, tmp_path: Path):
+        nss = _builder_with_mock_results(tmp_path)
+        nss.results.evaluation_report_html = None
+
+        nss.save_results()
+
+        assert nss._workdir.output_file.exists()
+        assert not nss._workdir.evaluation_report.exists()