Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,58 @@ Use filesystem navigation tools to explore the codebase structure as needed.
- Add appropriate error handling
- Document public APIs thoroughly

### Field Description Standards
When adding or modifying Field descriptions in stack component configs:

#### Template Structure
```
{Purpose statement}. {Valid values/format}. {Example(s)}. {Additional context if needed}.
```

#### Core Requirements
1. **Purpose**: Clearly state what the field controls or does
2. **Format**: Specify expected value format (URL, path, enum, etc.)
3. **Examples**: Provide at least one concrete example
4. **Constraints**: Include any limitations or requirements

#### Quality Standards
- Minimum 30 characters
- Use action words (controls, configures, specifies, determines)
- Include concrete examples with realistic values
- Avoid vague language ("thing", "stuff", "value", "setting")
- Don't start with "The" or end with periods
- Be specific about valid formats and constraints

#### Example Field Descriptions
```python
# Good examples:
instance_type: Optional[str] = Field(
None,
description="AWS EC2 instance type for step execution. Must be a valid "
"SageMaker-supported instance type. Examples: 'ml.t3.medium' (2 vCPU, 4GB RAM), "
"'ml.m5.xlarge' (4 vCPU, 16GB RAM). Defaults to ml.m5.xlarge for training steps"
)

path: str = Field(
description="Root path for artifact storage. Must be a valid URI supported by the "
"artifact store implementation. Examples: 's3://my-bucket/artifacts', "
"'/local/storage/path', 'gs://bucket-name/zenml-artifacts'. Path must be accessible "
"with configured credentials"
)

synchronous: bool = Field(
True,
description="Controls whether pipeline execution blocks the client. If True, "
"the client waits until all steps complete. If False, returns immediately and "
"executes asynchronously. Useful for long-running production pipelines"
)
```

#### Validation
- Run `python scripts/validate_descriptions.py` to check description quality
- All descriptions must pass validation before merging
- Add validation to CI pipeline to prevent regressions

### When Fixing Bugs
- Add regression tests that would have caught the bug
- Understand root cause before implementing fix
Expand Down
147 changes: 146 additions & 1 deletion docs/mkdocstrings_helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import argparse
import ast
import os
import subprocess
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Tuple
import re

PYDOCSTYLE_CMD = (
"pydocstyle --convention=google --add-ignore=D100,D101,D102,"
Expand Down Expand Up @@ -66,6 +68,147 @@ def generate_title(s: str) -> str:
return s


def extract_field_description_from_code(code: str, field_name: str) -> Optional[str]:
"""Extract Field description from Python code using AST parsing."""
try:
tree = ast.parse(code)

for node in ast.walk(tree):
if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
if node.target.id == field_name and isinstance(node.value, ast.Call):
# Check if it's a Field call
if (isinstance(node.value.func, ast.Name) and node.value.func.id == "Field") or \
(isinstance(node.value.func, ast.Attribute) and node.value.func.attr == "Field"):

# Extract description from Field arguments
for keyword in node.value.keywords:
if keyword.arg == "description" and isinstance(keyword.value, ast.Constant):
return keyword.value.value
except:
pass
return None


def generate_docstring_attributes_from_fields(file_path: Path) -> None:
"""Generate docstring attributes section from Pydantic Field descriptions."""
if not file_path.exists() or not file_path.name.endswith('.py'):
return

try:
content = file_path.read_text(encoding='utf-8')

# Skip if no Field imports or pydantic usage
if 'from pydantic import' not in content and 'import pydantic' not in content:
return

# Parse the file to find classes with Field definitions
tree = ast.parse(content)
modified = False

for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
# Check if class has Field definitions
field_descriptions = {}
class_start_line = node.lineno

# Find Field definitions in the class
for item in node.body:
if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
field_name = item.target.id
if isinstance(item.value, ast.Call):
# Check if it's a Field call
if (isinstance(item.value.func, ast.Name) and item.value.func.id == "Field") or \
(isinstance(item.value.func, ast.Attribute) and item.value.func.attr == "Field"):

# Extract description
for keyword in item.value.keywords:
if keyword.arg == "description" and isinstance(keyword.value, ast.Constant):
field_descriptions[field_name] = keyword.value.value

# If we found Field descriptions, update the docstring
if field_descriptions:
lines = content.split('\n')
docstring_start, docstring_end = find_class_docstring_range(lines, class_start_line - 1)

if docstring_start is not None and docstring_end is not None:
# Extract existing docstring
existing_docstring = '\n'.join(lines[docstring_start:docstring_end + 1])

# Check if it already has Attributes section
if 'Attributes:' not in existing_docstring:
# Generate attributes section
attributes_section = generate_attributes_section(field_descriptions)

# Insert before the closing triple quotes
if existing_docstring.strip().endswith('"""'):
# Multi-line docstring
new_docstring = existing_docstring.rstrip('"""').rstrip() + '\n\n' + attributes_section + '\n """'
elif existing_docstring.strip().endswith("'''"):
# Multi-line docstring with single quotes
new_docstring = existing_docstring.rstrip("'''").rstrip() + '\n\n' + attributes_section + "\n '''"
else:
continue

lines[docstring_start:docstring_end + 1] = new_docstring.split('\n')
modified = True

if modified:
file_path.write_text('\n'.join(lines), encoding='utf-8')

except Exception as e:
print(f"Warning: Could not process {file_path}: {e}")


def find_class_docstring_range(lines: List[str], class_line: int) -> Tuple[Optional[int], Optional[int]]:
"""Find the start and end line numbers of a class docstring."""
# Look for docstring starting after class definition
for i in range(class_line + 1, min(class_line + 10, len(lines))):
line = lines[i].strip()
if line.startswith('"""') or line.startswith("'''"):
quote_type = '"""' if line.startswith('"""') else "'''"
start_line = i

# Check if it's a single-line docstring
if line.count(quote_type) >= 2:
return start_line, start_line

# Find the end of multi-line docstring
for j in range(i + 1, len(lines)):
if quote_type in lines[j]:
return start_line, j
return None, None


def generate_attributes_section(field_descriptions: dict) -> str:
"""Generate an Attributes section from field descriptions."""
attributes_lines = [" Attributes:"]

for field_name, description in field_descriptions.items():
# Clean up description - remove extra whitespace and line breaks
clean_description = ' '.join(description.split())
attributes_lines.append(f" {field_name}: {clean_description}")

return '\n'.join(attributes_lines)


def process_pydantic_files_in_directory(directory: Path) -> None:
"""Process all Python files in a directory to generate docstring attributes."""
if not directory.exists():
return

print(f"Processing Pydantic files in {directory}...")

# Find all Python files recursively
python_files = list(directory.rglob("*.py"))

for file_path in python_files:
# Skip __pycache__ directories and other non-source files
if "__pycache__" in str(file_path) or file_path.name.startswith("_"):
continue

generate_docstring_attributes_from_fields(file_path)


def create_entity_docs(
api_doc_file_dir: Path,
ignored_modules: List[str],
Expand Down Expand Up @@ -164,6 +307,8 @@ def generate_docs(
ignored_modules: A list of modules that should be ignored.
validate: Boolean if pydocstyle should be verified within dir
"""
# First, process all Pydantic files to generate docstring attributes
process_pydantic_files_in_directory(path)
# Set up output paths for the generated md files
api_doc_file_dir = output_path / API_DOCS
cli_dev_doc_file_dir = output_path / API_DOCS / "cli"
Expand Down
15 changes: 12 additions & 3 deletions src/zenml/artifact_stores/base_artifact_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
cast,
)

from pydantic import model_validator
from pydantic import Field, model_validator

from zenml.constants import (
ENV_ZENML_SERVER,
Expand Down Expand Up @@ -187,9 +187,18 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any:


class BaseArtifactStoreConfig(StackComponentConfig):
"""Config class for `BaseArtifactStore`."""
"""Config class for `BaseArtifactStore`.

path: str
Base configuration for artifact storage backends.
Field descriptions are defined inline using Field() descriptors.
"""

path: str = Field(
description="Root path for artifact storage. Must be a valid URI supported by the "
"specific artifact store implementation. Examples: 's3://my-bucket/artifacts', "
"'/local/storage/path', 'gs://bucket-name/zenml-artifacts', 'azure://container/path'. "
"Path must be accessible with the configured credentials and permissions"
)

SUPPORTED_SCHEMES: ClassVar[Set[str]]
IS_IMMUTABLE_FILESYSTEM: ClassVar[bool] = False
Expand Down
22 changes: 17 additions & 5 deletions src/zenml/container_registries/base_container_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import re
from typing import TYPE_CHECKING, Optional, Tuple, Type, cast

from pydantic import field_validator
from pydantic import Field, field_validator

from zenml.constants import DOCKER_REGISTRY_RESOURCE_TYPE
from zenml.enums import StackComponentType
Expand All @@ -36,12 +36,24 @@
class BaseContainerRegistryConfig(AuthenticationConfigMixin):
"""Base config for a container registry.

Attributes:
uri: The URI of the container registry.
Configuration for connecting to container image registries.
Field descriptions are defined inline using Field() descriptors.
"""

uri: str
default_repository: Optional[str] = None
uri: str = Field(
description="Container registry URI (e.g., 'gcr.io' for Google Container "
"Registry, 'docker.io' for Docker Hub, 'registry.gitlab.com' for GitLab "
"Container Registry, 'ghcr.io' for GitHub Container Registry). This is "
"the base URL where container images will be pushed to and pulled from."
)
default_repository: Optional[str] = Field(
default=None,
description="Default repository namespace for image storage (e.g., "
"'username' for Docker Hub, 'project-id' for GCR, 'organization' for "
"GitHub Container Registry). If not specified, images will be stored at "
"the registry root. For Docker Hub this would mean only official images "
"can be pushed.",
)

@field_validator("uri")
@classmethod
Expand Down
Loading