zenml-io · AlexejPenner · Jun 26, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -124,6 +124,58 @@ Use filesystem navigation tools to explore the codebase structure as needed.
 - Add appropriate error handling
 - Document public APIs thoroughly
 
+### Field Description Standards
+When adding or modifying Field descriptions in stack component configs:
+
+#### Template Structure
+```
+{Purpose statement}. {Valid values/format}. {Example(s)}. {Additional context if needed}.
+```
+
+#### Core Requirements
+1. **Purpose**: Clearly state what the field controls or does
+2. **Format**: Specify expected value format (URL, path, enum, etc.)
+3. **Examples**: Provide at least one concrete example
+4. **Constraints**: Include any limitations or requirements
+
+#### Quality Standards
+- Minimum 30 characters
+- Use action words (controls, configures, specifies, determines)
+- Include concrete examples with realistic values
+- Avoid vague language ("thing", "stuff", "value", "setting")
+- Don't start with "The" or end with periods
+- Be specific about valid formats and constraints
+
+#### Example Field Descriptions
+```python
+# Good examples:
+instance_type: Optional[str] = Field(
+    None,
+    description="AWS EC2 instance type for step execution. Must be a valid "
+    "SageMaker-supported instance type. Examples: 'ml.t3.medium' (2 vCPU, 4GB RAM), "
+    "'ml.m5.xlarge' (4 vCPU, 16GB RAM). Defaults to ml.m5.xlarge for training steps"
+)
+
+path: str = Field(
+    description="Root path for artifact storage. Must be a valid URI supported by the "
+    "artifact store implementation. Examples: 's3://my-bucket/artifacts', "
+    "'/local/storage/path', 'gs://bucket-name/zenml-artifacts'. Path must be accessible "
+    "with configured credentials"
+)
+
+synchronous: bool = Field(
+    True,
+    description="Controls whether pipeline execution blocks the client. If True, "
+    "the client waits until all steps complete. If False, returns immediately and "
+    "executes asynchronously. Useful for long-running production pipelines"
+)
+```
+
+#### Validation
+- Run `python scripts/validate_descriptions.py` to check description quality
+- All descriptions must pass validation before merging
+- Add validation to CI pipeline to prevent regressions
+
 ### When Fixing Bugs
 - Add regression tests that would have caught the bug
 - Understand root cause before implementing fix

diff --git a/docs/mkdocstrings_helper.py b/docs/mkdocstrings_helper.py
@@ -1,8 +1,10 @@
 import argparse
+import ast
 import os
 import subprocess
 from pathlib import Path
-from typing import List, Optional
+from typing import List, Optional, Tuple
+import re
 
 PYDOCSTYLE_CMD = (
     "pydocstyle --convention=google --add-ignore=D100,D101,D102,"
@@ -66,6 +68,147 @@ def generate_title(s: str) -> str:
     return s
 
 
+def extract_field_description_from_code(code: str, field_name: str) -> Optional[str]:
+    """Extract Field description from Python code using AST parsing."""
+    try:
+        tree = ast.parse(code)
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
+                if node.target.id == field_name and isinstance(node.value, ast.Call):
+                    # Check if it's a Field call
+                    if (isinstance(node.value.func, ast.Name) and node.value.func.id == "Field") or \
+                       (isinstance(node.value.func, ast.Attribute) and node.value.func.attr == "Field"):
+
+                        # Extract description from Field arguments
+                        for keyword in node.value.keywords:
+                            if keyword.arg == "description" and isinstance(keyword.value, ast.Constant):
+                                return keyword.value.value
+    except:
+        pass
+    return None
+
+
+def generate_docstring_attributes_from_fields(file_path: Path) -> None:
+    """Generate docstring attributes section from Pydantic Field descriptions."""
+    if not file_path.exists() or not file_path.name.endswith('.py'):
+        return
+
+    try:
+        content = file_path.read_text(encoding='utf-8')
+
+        # Skip if no Field imports or pydantic usage
+        if 'from pydantic import' not in content and 'import pydantic' not in content:
+            return
+
+        # Parse the file to find classes with Field definitions
+        tree = ast.parse(content)
+        modified = False
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.ClassDef):
+                # Check if class has Field definitions
+                field_descriptions = {}
+                class_start_line = node.lineno
+
+                # Find Field definitions in the class
+                for item in node.body:
+                    if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
+                        field_name = item.target.id
+                        if isinstance(item.value, ast.Call):
+                            # Check if it's a Field call
+                            if (isinstance(item.value.func, ast.Name) and item.value.func.id == "Field") or \
+                               (isinstance(item.value.func, ast.Attribute) and item.value.func.attr == "Field"):
+
+                                # Extract description
+                                for keyword in item.value.keywords:
+                                    if keyword.arg == "description" and isinstance(keyword.value, ast.Constant):
+                                        field_descriptions[field_name] = keyword.value.value
+
+                # If we found Field descriptions, update the docstring
+                if field_descriptions:
+                    lines = content.split('\n')
+                    docstring_start, docstring_end = find_class_docstring_range(lines, class_start_line - 1)
+
+                    if docstring_start is not None and docstring_end is not None:
+                        # Extract existing docstring
+                        existing_docstring = '\n'.join(lines[docstring_start:docstring_end + 1])
+
+                        # Check if it already has Attributes section
+                        if 'Attributes:' not in existing_docstring:
+                            # Generate attributes section
+                            attributes_section = generate_attributes_section(field_descriptions)
+
+                            # Insert before the closing triple quotes
+                            if existing_docstring.strip().endswith('"""'):
+                                # Multi-line docstring
+                                new_docstring = existing_docstring.rstrip('"""').rstrip() + '\n\n' + attributes_section + '\n    """'
+                            elif existing_docstring.strip().endswith("'''"):
+                                # Multi-line docstring with single quotes
+                                new_docstring = existing_docstring.rstrip("'''").rstrip() + '\n\n' + attributes_section + "\n    '''"
+                            else:
+                                continue
+
+                            lines[docstring_start:docstring_end + 1] = new_docstring.split('\n')
+                            modified = True
+
+        if modified:
+            file_path.write_text('\n'.join(lines), encoding='utf-8')
+
+    except Exception as e:
+        print(f"Warning: Could not process {file_path}: {e}")
+
+
+def find_class_docstring_range(lines: List[str], class_line: int) -> Tuple[Optional[int], Optional[int]]:
+    """Find the start and end line numbers of a class docstring."""
+    # Look for docstring starting after class definition
+    for i in range(class_line + 1, min(class_line + 10, len(lines))):
+        line = lines[i].strip()
+        if line.startswith('"""') or line.startswith("'''"):
+            quote_type = '"""' if line.startswith('"""') else "'''"
+            start_line = i
+
+            # Check if it's a single-line docstring
+            if line.count(quote_type) >= 2:
+                return start_line, start_line
+
+            # Find the end of multi-line docstring
+            for j in range(i + 1, len(lines)):
+                if quote_type in lines[j]:
+                    return start_line, j
+    return None, None
+
+
+def generate_attributes_section(field_descriptions: dict) -> str:
+    """Generate an Attributes section from field descriptions."""
+    attributes_lines = ["    Attributes:"]
+
+    for field_name, description in field_descriptions.items():
+        # Clean up description - remove extra whitespace and line breaks
+        clean_description = ' '.join(description.split())
+        attributes_lines.append(f"        {field_name}: {clean_description}")
+
+    return '\n'.join(attributes_lines)
+
+
+def process_pydantic_files_in_directory(directory: Path) -> None:
+    """Process all Python files in a directory to generate docstring attributes."""
+    if not directory.exists():
+        return
+
+    print(f"Processing Pydantic files in {directory}...")
+
+    # Find all Python files recursively
+    python_files = list(directory.rglob("*.py"))
+
+    for file_path in python_files:
+        # Skip __pycache__ directories and other non-source files
+        if "__pycache__" in str(file_path) or file_path.name.startswith("_"):
+            continue
+
+        generate_docstring_attributes_from_fields(file_path)
+
+
 def create_entity_docs(
     api_doc_file_dir: Path,
     ignored_modules: List[str],
@@ -164,6 +307,8 @@ def generate_docs(
         ignored_modules: A list of modules that should be ignored.
         validate: Boolean if pydocstyle should be verified within dir
     """
+    # First, process all Pydantic files to generate docstring attributes
+    process_pydantic_files_in_directory(path)
     # Set up output paths for the generated md files
     api_doc_file_dir = output_path / API_DOCS
     cli_dev_doc_file_dir = output_path / API_DOCS / "cli"

diff --git a/src/zenml/artifact_stores/base_artifact_store.py b/src/zenml/artifact_stores/base_artifact_store.py
@@ -33,7 +33,7 @@
     cast,
 )
 
-from pydantic import model_validator
+from pydantic import Field, model_validator
 
 from zenml.constants import (
     ENV_ZENML_SERVER,
@@ -187,9 +187,18 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any:
 
 
 class BaseArtifactStoreConfig(StackComponentConfig):
-    """Config class for `BaseArtifactStore`."""
+    """Config class for `BaseArtifactStore`.
 
-    path: str
+    Base configuration for artifact storage backends.
+    Field descriptions are defined inline using Field() descriptors.
+    """
+
+    path: str = Field(
+        description="Root path for artifact storage. Must be a valid URI supported by the "
+        "specific artifact store implementation. Examples: 's3://my-bucket/artifacts', "
+        "'/local/storage/path', 'gs://bucket-name/zenml-artifacts', 'azure://container/path'. "
+        "Path must be accessible with the configured credentials and permissions"
+    )
 
     SUPPORTED_SCHEMES: ClassVar[Set[str]]
     IS_IMMUTABLE_FILESYSTEM: ClassVar[bool] = False

diff --git a/src/zenml/container_registries/base_container_registry.py b/src/zenml/container_registries/base_container_registry.py
@@ -16,7 +16,7 @@
 import re
 from typing import TYPE_CHECKING, Optional, Tuple, Type, cast
 
-from pydantic import field_validator
+from pydantic import Field, field_validator
 
 from zenml.constants import DOCKER_REGISTRY_RESOURCE_TYPE
 from zenml.enums import StackComponentType
@@ -36,12 +36,24 @@
 class BaseContainerRegistryConfig(AuthenticationConfigMixin):
     """Base config for a container registry.
 
-    Attributes:
-        uri: The URI of the container registry.
+    Configuration for connecting to container image registries.
+    Field descriptions are defined inline using Field() descriptors.
     """
 
-    uri: str
-    default_repository: Optional[str] = None
+    uri: str = Field(
+        description="Container registry URI (e.g., 'gcr.io' for Google Container "
+        "Registry, 'docker.io' for Docker Hub, 'registry.gitlab.com' for GitLab "
+        "Container Registry, 'ghcr.io' for GitHub Container Registry). This is "
+        "the base URL where container images will be pushed to and pulled from."
+    )
+    default_repository: Optional[str] = Field(
+        default=None,
+        description="Default repository namespace for image storage (e.g., "
+        "'username' for Docker Hub, 'project-id' for GCR, 'organization' for "
+        "GitHub Container Registry). If not specified, images will be stored at "
+        "the registry root. For Docker Hub this would mean only official images "
+        "can be pushed.",
+    )
 
     @field_validator("uri")
     @classmethod