|
| 1 | +"""Validate file paths referenced in bernstein.yaml configuration. |
| 2 | +
|
| 3 | +This module provides startup validation for config-referenced paths: |
| 4 | +context_files, agent_catalog, and other filesystem paths. If any path |
| 5 | +does not exist, validation fails with a clear message before run starts. |
| 6 | +""" |
| 7 | + |
| 8 | +from __future__ import annotations |
| 9 | + |
| 10 | +import logging |
| 11 | +from dataclasses import dataclass |
| 12 | +from pathlib import Path |
| 13 | +from typing import TYPE_CHECKING |
| 14 | + |
| 15 | +if TYPE_CHECKING: |
| 16 | + from bernstein.core.seed import SeedConfig |
| 17 | + |
| 18 | +logger = logging.getLogger(__name__) |
| 19 | + |
| 20 | + |
| 21 | +@dataclass(frozen=True) |
| 22 | +class PathValidationError: |
| 23 | + """A single path validation failure. |
| 24 | +
|
| 25 | + Attributes: |
| 26 | + field: Config field name (e.g., "context_files", "agent_catalog"). |
| 27 | + path: The path that failed validation. |
| 28 | + reason: Human-readable reason for the failure. |
| 29 | + """ |
| 30 | + |
| 31 | + field: str |
| 32 | + path: str |
| 33 | + reason: str |
| 34 | + |
| 35 | + def __str__(self) -> str: |
| 36 | + return f"{self.field}: '{self.path}' {self.reason}" |
| 37 | + |
| 38 | + |
| 39 | +@dataclass(frozen=True) |
| 40 | +class PathValidationResult: |
| 41 | + """Result of validating all config paths. |
| 42 | +
|
| 43 | + Attributes: |
| 44 | + errors: List of validation errors found. |
| 45 | + """ |
| 46 | + |
| 47 | + errors: tuple[PathValidationError, ...] |
| 48 | + |
| 49 | + @property |
| 50 | + def ok(self) -> bool: |
| 51 | + """Return True if no validation errors.""" |
| 52 | + return len(self.errors) == 0 |
| 53 | + |
| 54 | + def format_errors(self) -> str: |
| 55 | + """Format all errors as a newline-separated string.""" |
| 56 | + return "\n".join(f" - {e}" for e in self.errors) |
| 57 | + |
| 58 | + |
| 59 | +def validate_config_paths(seed: SeedConfig, workdir: Path) -> PathValidationResult: |
| 60 | + """Validate all file paths referenced in the seed configuration. |
| 61 | +
|
| 62 | + Checks that: |
| 63 | + - All context_files exist and are readable files |
| 64 | + - agent_catalog (if set) exists and is a directory |
| 65 | +
|
| 66 | + Args: |
| 67 | + seed: Validated seed configuration from bernstein.yaml. |
| 68 | + workdir: Project working directory for resolving relative paths. |
| 69 | +
|
| 70 | + Returns: |
| 71 | + PathValidationResult with any errors found. |
| 72 | + """ |
| 73 | + errors: list[PathValidationError] = [] |
| 74 | + |
| 75 | + # Validate context_files |
| 76 | + for ctx_path in seed.context_files: |
| 77 | + full_path = Path(ctx_path) |
| 78 | + # Handle both absolute and relative paths |
| 79 | + if not full_path.is_absolute(): |
| 80 | + full_path = workdir / ctx_path |
| 81 | + if not full_path.exists(): |
| 82 | + errors.append( |
| 83 | + PathValidationError( |
| 84 | + field="context_files", |
| 85 | + path=ctx_path, |
| 86 | + reason="does not exist", |
| 87 | + ) |
| 88 | + ) |
| 89 | + elif not full_path.is_file(): |
| 90 | + errors.append( |
| 91 | + PathValidationError( |
| 92 | + field="context_files", |
| 93 | + path=ctx_path, |
| 94 | + reason="is not a file", |
| 95 | + ) |
| 96 | + ) |
| 97 | + |
| 98 | + # Validate agent_catalog |
| 99 | + if seed.agent_catalog is not None: |
| 100 | + catalog_path = Path(seed.agent_catalog) |
| 101 | + # Handle both absolute and relative paths |
| 102 | + if not catalog_path.is_absolute(): |
| 103 | + catalog_path = workdir / catalog_path |
| 104 | + if not catalog_path.exists(): |
| 105 | + errors.append( |
| 106 | + PathValidationError( |
| 107 | + field="agent_catalog", |
| 108 | + path=seed.agent_catalog, |
| 109 | + reason="does not exist", |
| 110 | + ) |
| 111 | + ) |
| 112 | + elif not catalog_path.is_dir(): |
| 113 | + errors.append( |
| 114 | + PathValidationError( |
| 115 | + field="agent_catalog", |
| 116 | + path=seed.agent_catalog, |
| 117 | + reason="is not a directory", |
| 118 | + ) |
| 119 | + ) |
| 120 | + |
| 121 | + return PathValidationResult(errors=tuple(errors)) |
| 122 | + |
| 123 | + |
| 124 | +def check_config_paths(seed: SeedConfig, workdir: Path) -> None: |
| 125 | + """Validate config paths and exit with clear error if any are missing. |
| 126 | +
|
| 127 | + This is the main entry point for preflight path validation. Call this |
| 128 | + during startup before the orchestrator begins execution. |
| 129 | +
|
| 130 | + Args: |
| 131 | + seed: Validated seed configuration from bernstein.yaml. |
| 132 | + workdir: Project working directory for resolving relative paths. |
| 133 | +
|
| 134 | + Raises: |
| 135 | + SystemExit: If any config-referenced paths are missing or invalid. |
| 136 | + """ |
| 137 | + from bernstein.cli.errors import BernsteinError, ExitCode, handle_cli_error |
| 138 | + |
| 139 | + result = validate_config_paths(seed, workdir) |
| 140 | + if not result.ok: |
| 141 | + raise handle_cli_error( |
| 142 | + BernsteinError( |
| 143 | + what="Config references invalid paths", |
| 144 | + why=f"The following paths in bernstein.yaml are missing or incorrect:\n{result.format_errors()}", |
| 145 | + fix="Create the missing files/directories or update bernstein.yaml to reference valid paths", |
| 146 | + exit_code=ExitCode.CONFIG, |
| 147 | + ) |
| 148 | + ) |
| 149 | + |
| 150 | + # Log success for debugging |
| 151 | + validated_count = len(seed.context_files) + (1 if seed.agent_catalog else 0) |
| 152 | + if validated_count > 0: |
| 153 | + logger.debug("Validated %d config path(s)", validated_count) |
0 commit comments