Skip to content

Commit 847e974

Browse files
docs: Update docstrings in src/cli/ (#154)
# Summary Add and update docstrings in `src/cli/` based on STYLE_GUIDE.md. --------- Made with [Cursor](https://cursor.com/) Signed-off-by: Kendrick Boyd <kendrickb@nvidia.com>
1 parent a93c5cf commit 847e974

9 files changed

Lines changed: 279 additions & 140 deletions

File tree

src/nemo_safe_synthesizer/cli/artifact_structure.py

Lines changed: 107 additions & 76 deletions
Large diffs are not rendered by default.

src/nemo_safe_synthesizer/cli/artifacts.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4+
"""CLI entry points for artifact management.
5+
6+
Provides CLI commands for inspecting and cleaning up artifact directories
7+
produced by Safe Synthesizer runs.
8+
"""
9+
410
from __future__ import annotations
511

612
import shutil
@@ -14,7 +20,7 @@
1420
@click.group(invoke_without_command=True)
1521
@click.pass_context
1622
def artifacts(ctx: click.Context):
17-
"""Artifacts management commands."""
23+
"""Artifact management commands."""
1824
pass
1925

2026

src/nemo_safe_synthesizer/cli/cli.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4+
"""Top-level CLI group for Safe Synthesizer.
5+
6+
Assembles the ``config``, ``run``, and ``artifacts`` subcommand groups
7+
into a single Python Click entry point.
8+
"""
9+
410
from __future__ import annotations
511

612
import click

src/nemo_safe_synthesizer/cli/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4+
"""CLI entry points for configuration management.
5+
6+
Each command loads or creates a ``SafeSynthesizerParameters`` model, optionally applies
7+
CLI overrides, and either validates, prints, or writes the result.
8+
"""
9+
410
from __future__ import annotations
511

612
import click

src/nemo_safe_synthesizer/cli/datasets.py

Lines changed: 72 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,29 @@
2424
class DatasetInfo(BaseModel):
2525
"""Entry in the dataset registry."""
2626

27-
name: str
28-
"""Short name of the dataset.
29-
30-
Used to fetch the dataset from the registry.
31-
"""
32-
33-
url: str
34-
"""URL or path to the dataset.
35-
36-
If a relative path, it is joined with the base_url from the registry if
37-
present.
38-
"""
39-
40-
overrides: dict[str, Any] | None = None
41-
"""Config overrides for this dataset.
42-
43-
These overrides take precendence over the values from the config file in
44-
the CLI, but are themselves overridden by any CLI args specifying config
45-
parameters.
46-
"""
47-
48-
load_args: dict[str, Any] | None = None
49-
"""Extra arguments needed by the data reader for a this dataset."""
27+
name: str = Field(description=("Short name of the dataset. Used to fetch the dataset from the registry by name."))
28+
29+
url: str = Field(
30+
description=(
31+
"URL or path to the dataset. "
32+
"If a relative path, it is joined with the base_url from the registry if present."
33+
)
34+
)
35+
36+
overrides: dict[str, Any] | None = Field(
37+
default=None,
38+
description=(
39+
"Config overrides for this dataset. "
40+
"These overrides take precedence over the values from the config file in "
41+
"the CLI, but are themselves overridden by any CLI args specifying config "
42+
"parameters."
43+
),
44+
)
45+
46+
load_args: dict[str, Any] | None = Field(
47+
default=None,
48+
description="Extra arguments needed by the data reader for this dataset.",
49+
)
5050

5151
_registry: DatasetRegistry | None = None
5252
"""Private attribute to keep a reference to an associated registry.
@@ -81,7 +81,16 @@ def get_url(self) -> str:
8181
return self.url
8282

8383
def fetch(self) -> pd.DataFrame:
84-
"""Fetch the dataset and return a pandas DataFrame."""
84+
"""Fetch the dataset and return a pandas DataFrame.
85+
86+
Infers the file format from the URL extension and merges any ``load_args`` on top of per-format defaults.
87+
88+
Returns:
89+
The dataset as a DataFrame.
90+
91+
Raises:
92+
ValueError: If the file extension is not supported.
93+
"""
8594
url = self.get_url()
8695

8796
logger.info(f"Reading dataset from {url}")
@@ -116,27 +125,45 @@ def fetch(self) -> pd.DataFrame:
116125

117126

118127
class DatasetRegistry(BaseModel):
119-
"""Registry of datasets for easy reference by name."""
120-
121-
datasets: list[DatasetInfo] = Field(default_factory=list)
122-
"""List of datasets in the registry."""
128+
"""Registry of datasets for easy reference by name.
123129
124-
base_url: str | None = None
125-
"""Base URL for the registry.
130+
Datasets can be looked up by name via ``get_dataset``. If the name is
131+
not in the registry, a new ``DatasetInfo`` is created on-the-fly treating
132+
the name as a literal URL or path.
126133
127-
Any relative paths will be prepended with the base_url before attempting to load the dataset.
128-
This only applies to the datasets in the registry which have a relative url.
134+
When constructed, the DatasetRegistry automatically adds back-references to each entry in ``self.datasets`` so the
135+
``DatasetInfo`` instances can resolve ``base_url``.
129136
"""
130137

138+
datasets: list[DatasetInfo] = Field(default_factory=list, description="List of datasets in the registry.")
139+
140+
base_url: str | None = Field(
141+
default=None,
142+
description=(
143+
"Base URL for the registry. Any relative paths will be prepended with the base_url before "
144+
"attempting to load the dataset. This only applies to the datasets in the registry which have "
145+
"a relative url."
146+
),
147+
)
148+
131149
def __init__(self, **data):
132150
super().__init__(**data)
133151
for dataset in self.datasets:
134152
dataset._registry = self
135153

136154
def get_dataset(self, url: str) -> DatasetInfo:
137-
"""Get a dataset from the registry.
155+
"""Look up a dataset by name, creating an ad-hoc entry if not found.
156+
157+
When ``url`` matches a registered name the corresponding entry is
158+
returned. Otherwise a new ``DatasetInfo`` is created with the raw
159+
``url`` as both name and path (without a registry back-reference, so
160+
relative paths resolve against the working directory).
161+
162+
Args:
163+
url: Dataset name, URL, or file path.
138164
139-
Automatically adds a new Dataset with name url if it doesn't exist in the registry.
165+
Returns:
166+
Matching or newly created ``DatasetInfo``.
140167
"""
141168
for dataset in self.datasets:
142169
if dataset.name == url:
@@ -153,7 +180,17 @@ def get_dataset(self, url: str) -> DatasetInfo:
153180

154181
@classmethod
155182
def from_yaml(cls, path: str | Path) -> Self:
156-
"""Load a DatasetRegistry from a YAML file."""
183+
"""Load a ``DatasetRegistry`` from a YAML file.
184+
185+
Args:
186+
path: Path to the YAML file.
187+
188+
Returns:
189+
Parsed registry with back-references set on each dataset.
190+
191+
Raises:
192+
FileNotFoundError: If ``path`` does not exist.
193+
"""
157194
if not Path(path).exists():
158195
raise FileNotFoundError(f"File {path} does not exist")
159196
with open(path, "r") as f:

src/nemo_safe_synthesizer/cli/run.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
"""CLI run commands for Safe Synthesizer.
55
66
This module provides the run command group for the Safe Synthesizer pipeline:
7-
- `run` (default): Full end-to-end pipeline
8-
- `run train`: Training stage only
9-
- `run generate`: Generation stage only (requires trained model)
7+
8+
- ``run`` (default): Full end-to-end pipeline
9+
- ``run train``: Training stage only
10+
- ``run generate``: Generation stage only (requires trained model)
1011
"""
1112

1213
from __future__ import annotations

src/nemo_safe_synthesizer/cli/settings.py

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
(observability, wandb, etc.) into a single pydantic-settings class.
88
99
The CLISettings class:
10+
1011
- Automatically loads from environment variables
1112
- Composes existing settings classes as nested fields
1213
- Provides a single source of truth for all CLI configuration
@@ -58,80 +59,115 @@ class CLISettings(BaseSettings):
5859
env_file_encoding="utf-8",
5960
)
6061

61-
# Compose existing settings (automatically populated from env vars)
62-
# These are NOT loaded from env vars by CLISettings - they load their own
63-
# We use default_factory to create fresh instances that read env vars
64-
observability: NSSObservabilitySettings = Field(default_factory=NSSObservabilitySettings)
65-
wandb: WandbSettings = Field(default_factory=WandbSettings)
62+
observability: NSSObservabilitySettings = Field(
63+
default_factory=NSSObservabilitySettings, description="Observability sub-settings (log level, format, color)."
64+
)
65+
"""Observability sub-settings (log level, format, color).
66+
67+
Loaded from its own environment variables; not populated by ``CLISettings``.
68+
"""
69+
70+
wandb: WandbSettings = Field(default_factory=WandbSettings, description="WandB settings (mode, project, phase).")
71+
"""WandB settings (mode, project, phase).
72+
73+
Loaded from its own environment variables; not populated by ``CLISettings``.
74+
"""
6675

67-
# CLI-specific settings (paths)
68-
# Note: AliasChoices allows both the field name (for CLI kwargs) and env var name to work
6976
url: str | None = Field(default=None, description="Dataset URL, name, or path to CSV")
77+
"""Dataset URL, name, or path to CSV."""
78+
7079
config_path: str | None = Field(
7180
default=None,
7281
validation_alias=AliasChoices("config_path", "NSS_CONFIG"),
7382
description="Path to YAML config file",
7483
)
84+
"""Path to YAML config file (env variable: ``NSS_CONFIG``)."""
85+
7586
artifact_path: str | None = Field(
7687
default=None,
7788
validation_alias=AliasChoices("artifact_path", "NSS_ARTIFACTS_PATH"),
7889
description="Base directory for all runs",
7990
)
91+
"""Base directory for all runs (env variable: ``NSS_ARTIFACTS_PATH``)."""
92+
8093
run_path: str | None = Field(
8194
default=None,
8295
description="Explicit path for this run's output directory",
8396
)
97+
"""Explicit path for this run's output directory.
98+
99+
When specified, overrides ``artifact_path`` and skips the
100+
``<project>/<timestamp>`` directory layout.
101+
"""
102+
84103
output_file: str | None = Field(
85104
default=None,
86105
description="Path to output CSV file",
87106
)
107+
"""Path to output CSV file, overriding the default workdir location."""
88108

89-
# Logging settings (can override observability defaults from CLI)
90109
log_format: Literal["json", "plain"] | None = Field(
91110
default=None,
92111
validation_alias=AliasChoices("log_format", "NSS_LOG_FORMAT"),
93112
description="Log format for console output",
94113
)
114+
"""Log format for console output (env variable: ``NSS_LOG_FORMAT``).
115+
116+
File logging is always JSON regardless of this setting.
117+
"""
118+
95119
log_color: bool | None = Field(
96120
default=None,
97121
description="Whether to colorize console output",
98122
)
123+
"""Whether to colorize console output."""
124+
99125
log_file: str | None = Field(
100126
default=None,
101127
validation_alias=AliasChoices("log_file", "NSS_LOG_FILE"),
102128
description="Path to log file",
103129
)
130+
"""Path to log file (env variable: ``NSS_LOG_FILE``)."""
131+
104132
verbose: int = Field(
105133
default=0,
106134
description="Verbosity level (0=INFO, 1=DEBUG, 2=DEBUG_DEPENDENCIES)",
107135
)
136+
"""Verbosity level (0=INFO, 1=DEBUG, 2=DEBUG_DEPENDENCIES)."""
108137

109-
# WandB settings (can override wandb defaults from CLI)
110138
wandb_mode: WandbMode | None = Field(
111139
default=None,
112140
description="WandB mode override",
113141
)
142+
"""WandB mode override (online, offline, or disabled)."""
143+
114144
wandb_project: str | None = Field(
115145
default=None,
116146
description="WandB project override",
117147
)
148+
"""WandB project name override."""
118149

119-
# Synthesis parameter overrides (populated from --data__*, --training__*, etc.)
120150
synthesis_overrides: dict[str, Any] = Field(
121151
default_factory=dict,
122152
description="Nested dict of SafeSynthesizerParameters overrides from CLI",
123153
)
154+
"""Nested dict of ``SafeSynthesizerParameters`` overrides from CLI.
155+
156+
Populated from ``--data__*``, ``--training__*``, etc. options via
157+
``parse_overrides``.
158+
"""
124159

125160
dataset_registry: str | None = Field(
126161
default=None,
127162
validation_alias=AliasChoices("dataset_registry", "NSS_DATASET_REGISTRY"),
128163
description="URL or path to a dataset registry YAML file",
129164
)
165+
"""URL or path to a dataset registry YAML file (env: ``NSS_DATASET_REGISTRY``)."""
130166

131167
@field_validator("wandb_mode", mode="before")
132168
@classmethod
133169
def validate_wandb_mode(cls, v: str | WandbMode | None) -> WandbMode | None:
134-
"""Convert string to WandbMode enum if needed."""
170+
"""Coerce string or None to ``WandbMode`` enum, passing through enum values unchanged."""
135171
if v is None:
136172
return None
137173
if isinstance(v, WandbMode):
@@ -141,7 +177,7 @@ def validate_wandb_mode(cls, v: str | WandbMode | None) -> WandbMode | None:
141177
@field_validator("verbose", mode="before")
142178
@classmethod
143179
def validate_verbose(cls, v: int | str | None) -> int:
144-
"""Ensure verbose is an integer."""
180+
"""Coerce string or None to int, defaulting to 0."""
145181
if v is None:
146182
return 0
147183
if isinstance(v, str):
@@ -167,35 +203,35 @@ def from_cli_kwargs(cls, **kwargs: Any) -> CLISettings:
167203

168204
@property
169205
def effective_artifact_path(self) -> Path:
170-
"""The effective artifact path, using default if not set."""
206+
"""Effective artifact path, falling back to ``DEFAULT_ARTIFACTS_PATH``."""
171207
if self.artifact_path:
172208
return Path(self.artifact_path)
173209
return DEFAULT_ARTIFACTS_PATH
174210

175211
@property
176212
def effective_log_format(self) -> Literal["json", "plain"]:
177-
"""The effective log format, falling back to observability settings."""
213+
"""Effective log format, falling back to observability settings."""
178214
if self.log_format is not None:
179215
return self.log_format
180216
return self.observability.nss_log_format or "plain"
181217

182218
@property
183219
def effective_log_color(self) -> bool:
184-
"""The effective log color setting, falling back to observability settings."""
220+
"""Effective log color setting, falling back to observability settings."""
185221
if self.log_color is not None:
186222
return self.log_color
187223
return self.observability.nss_log_color
188224

189225
@property
190226
def effective_wandb_mode(self) -> WandbMode:
191-
"""The effective wandb mode, falling back to wandb settings."""
227+
"""Effective wandb mode, falling back to wandb settings."""
192228
if self.wandb_mode is not None:
193229
return self.wandb_mode
194230
return self.wandb.wandb_mode
195231

196232
@property
197233
def effective_wandb_project(self) -> str | None:
198-
"""The effective wandb project, falling back to wandb settings."""
234+
"""Effective wandb project, falling back to wandb settings."""
199235
if self.wandb_project is not None:
200236
return self.wandb_project
201237
return self.wandb.wandb_project

0 commit comments

Comments
 (0)