Skip to content

Commit 08d4375

Browse files
committed
Add MCP spec server and preflight validation
1 parent 9569d9a commit 08d4375

11 files changed

Lines changed: 891 additions & 7 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies = [
1313
"jinja2>=3.1.0",
1414
"marshmallow>=3.20.0",
1515
"marshmallow-dataclass>=8.6.0",
16+
"mcp>=1.27.0",
1617
"pydantic>=2.5.0",
1718
"requests>=2.31.0",
1819
"rich>=13.0.0",
@@ -25,6 +26,7 @@ srtctl = "srtctl.cli.submit:main"
2526
srtctl-i = "srtctl.cli.interactive:main"
2627
srtctl-sweep = "srtctl.cli.do_sweep:main"
2728
srtctl-setup-head = "srtctl.cli.setup_head:main"
29+
srtctl-mcp = "srtctl.mcp.server:main"
2830

2931
[tool.hatch.build.targets.wheel]
3032
packages = ["src/srtctl"]

src/srtctl/cli/submit.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from srtctl.core.lockfile import load_lockfile_fingerprints
5353
from srtctl.core.schema import SrtConfig
5454
from srtctl.core.status import create_job_record
55+
from srtctl.core.validation import preflight_config_variants
5556

5657
console = Console()
5758
logger = logging.getLogger(__name__)
@@ -65,6 +66,24 @@ def _record_submission(data: dict) -> None:
6566
_submissions.append(data)
6667

6768

69+
def _format_preflight_error(label: str, results: list[Any]) -> str:
70+
lines = [f"Preflight failed for {label}:"]
71+
for result in results:
72+
for issue in result.errors:
73+
lines.append(f"- {issue.field}: {issue.message}")
74+
return "\n".join(lines)
75+
76+
77+
def _assert_preflight_passed(raw_config: dict[str, Any], *, label: str) -> None:
78+
results = preflight_config_variants(
79+
raw_config,
80+
cluster_config=load_cluster_config(),
81+
)
82+
failed = [result for result in results if not result.ok]
83+
if failed:
84+
raise ValueError(_format_preflight_error(label, failed))
85+
86+
6887
def _install_mock_submit_patches() -> list:
6988
"""Stub the subset of `submit_with_orchestrator` that reaches real infra.
7089
@@ -677,6 +696,16 @@ def submit_single(
677696
if config is None:
678697
raise ValueError("Either config_path or config must be provided")
679698

699+
if runtime_config_text is not None:
700+
raw_config = yaml.safe_load(runtime_config_text)
701+
elif config_path is not None:
702+
with open(config_path) as f:
703+
raw_config = yaml.safe_load(f)
704+
else:
705+
raw_config = config.model_dump(mode="json")
706+
707+
_assert_preflight_passed(raw_config, label=str(config_path or "<inline-config>"))
708+
680709
# Always use orchestrator mode
681710
return submit_with_orchestrator(
682711
config_path=config_path or Path("./config.yaml"),
@@ -1091,6 +1120,7 @@ def main():
10911120
srtctl apply -f config.yaml # Submit job
10921121
srtctl apply -f ./configs/ # Submit all YAMLs in directory
10931122
srtctl apply -f config.yaml --sweep # Submit sweep
1123+
srtctl preflight -f config.yaml # Check model/container availability
10941124
srtctl dry-run -f config.yaml # Dry run
10951125
srtctl resolve-override -f config.yaml # Resolve override YAML (no submit)
10961126
srtctl resolve-override -f config.yaml --stdout # Print to stdout
@@ -1144,6 +1174,19 @@ def add_common_args(p):
11441174
dry_run_parser = subparsers.add_parser("dry-run", help="Validate without submitting")
11451175
add_common_args(dry_run_parser)
11461176

1177+
preflight_parser = subparsers.add_parser(
1178+
"preflight",
1179+
help="Check model and container availability without submitting",
1180+
)
1181+
preflight_parser.add_argument(
1182+
"-f",
1183+
"--file",
1184+
type=str,
1185+
required=True,
1186+
dest="config",
1187+
help="YAML config file, or file:selector for overrides",
1188+
)
1189+
11471190
resolve_parser = subparsers.add_parser(
11481191
"resolve-override",
11491192
help="Resolve override YAML into specialised files without submitting",
@@ -1266,6 +1309,30 @@ def add_common_args(p):
12661309
resolve_override_cmd(config_path, selector=selector, stdout=getattr(args, "stdout", False))
12671310
return
12681311

1312+
if args.command == "preflight":
1313+
if config_path.is_dir():
1314+
raise ValueError("preflight currently expects a file, not a directory")
1315+
with open(config_path) as f:
1316+
raw_config = yaml.safe_load(f)
1317+
results = preflight_config_variants(
1318+
raw_config,
1319+
cluster_config=load_cluster_config(),
1320+
selector=selector,
1321+
)
1322+
for result in results:
1323+
icon = "[green]✓[/]" if result.ok else "[red]✗[/]"
1324+
console.print(f"{icon} {result.variant}")
1325+
console.print(f" model.path: {result.model.message}")
1326+
console.print(f" model.container: {result.container.message}")
1327+
if any(not result.ok for result in results):
1328+
raise ValueError(
1329+
_format_preflight_error(
1330+
str(config_path),
1331+
[result for result in results if not result.ok],
1332+
)
1333+
)
1334+
return
1335+
12691336
setup_script = getattr(args, "setup_script", None)
12701337
output_dir = getattr(args, "output_dir", None)
12711338

src/srtctl/core/validation.py

Lines changed: 230 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,20 @@
1212
from __future__ import annotations
1313

1414
import logging
15+
import os
1516
import threading
1617
from dataclasses import dataclass
1718
from pathlib import Path
18-
from typing import TYPE_CHECKING
19+
from typing import TYPE_CHECKING, Any
1920

2021
import requests
2122

23+
from srtctl.core.config import (
24+
generate_override_configs,
25+
load_cluster_config,
26+
resolve_config_with_defaults,
27+
)
28+
2229
if TYPE_CHECKING:
2330
from srtctl.core.schema import SrtConfig
2431

@@ -36,6 +43,228 @@ class ValidationResult:
3643
message: str
3744

3845

46+
@dataclass(frozen=True)
47+
class PreflightIssue:
48+
code: str
49+
field: str
50+
message: str
51+
52+
53+
@dataclass(frozen=True)
54+
class PreflightResolution:
55+
field: str
56+
raw: str | None
57+
resolved: str | None
58+
source: str
59+
ok: bool
60+
message: str
61+
62+
63+
@dataclass(frozen=True)
64+
class PreflightResult:
65+
variant: str
66+
ok: bool
67+
model: PreflightResolution
68+
container: PreflightResolution
69+
errors: list[PreflightIssue]
70+
71+
def as_dict(self) -> dict[str, Any]:
72+
return {
73+
"variant": self.variant,
74+
"ok": self.ok,
75+
"model": self.model.__dict__,
76+
"container": self.container.__dict__,
77+
"errors": [issue.__dict__ for issue in self.errors],
78+
}
79+
80+
81+
def _expand_path(value: str) -> str:
82+
return os.path.expanduser(os.path.expandvars(value))
83+
84+
85+
def _check_path(path_str: str, *, expect: str) -> tuple[bool, str]:
86+
path = Path(path_str).resolve()
87+
if not path.exists():
88+
return False, f"not found: {path}"
89+
if expect == "dir" and not path.is_dir():
90+
return False, f"not a directory: {path}"
91+
if expect == "file" and not path.is_file():
92+
return False, f"not a file: {path}"
93+
return True, f"exists: {path}"
94+
95+
96+
def _preflight_model(
97+
raw_config: dict[str, Any],
98+
resolved_config: dict[str, Any],
99+
cluster_config: dict[str, Any] | None,
100+
) -> tuple[PreflightResolution, list[PreflightIssue]]:
101+
raw = raw_config.get("model", {}).get("path")
102+
resolved = resolved_config.get("model", {}).get("path")
103+
aliases = (cluster_config or {}).get("model_paths") or {}
104+
source = "srtslurm.yaml:model_paths" if raw in aliases else "literal"
105+
106+
if not raw or not resolved:
107+
issue = PreflightIssue(
108+
code="model-missing",
109+
field="model.path",
110+
message="model.path is required",
111+
)
112+
return (
113+
PreflightResolution(
114+
field="model.path",
115+
raw=raw,
116+
resolved=resolved,
117+
source=source,
118+
ok=False,
119+
message=issue.message,
120+
),
121+
[issue],
122+
)
123+
124+
ok, detail = _check_path(_expand_path(resolved), expect="dir")
125+
if ok:
126+
return (
127+
PreflightResolution(
128+
field="model.path",
129+
raw=raw,
130+
resolved=str(Path(_expand_path(resolved)).resolve()),
131+
source=source,
132+
ok=True,
133+
message=detail,
134+
),
135+
[],
136+
)
137+
138+
if source == "srtslurm.yaml:model_paths":
139+
message = (
140+
f"Model alias '{raw}' resolved to '{resolved}', but that path is unavailable. "
141+
"Pull or register the model yourself before submitting."
142+
)
143+
else:
144+
message = (
145+
f"Model '{raw}' is not a local model path and is not defined in srtslurm.yaml "
146+
"model_paths. Pull or register the model yourself before submitting."
147+
)
148+
issue = PreflightIssue(
149+
code="model-not-available",
150+
field="model.path",
151+
message=message,
152+
)
153+
return (
154+
PreflightResolution(
155+
field="model.path",
156+
raw=raw,
157+
resolved=resolved,
158+
source=source,
159+
ok=False,
160+
message=message,
161+
),
162+
[issue],
163+
)
164+
165+
166+
def _preflight_container(
167+
raw_config: dict[str, Any],
168+
resolved_config: dict[str, Any],
169+
cluster_config: dict[str, Any] | None,
170+
) -> tuple[PreflightResolution, list[PreflightIssue]]:
171+
raw = raw_config.get("model", {}).get("container")
172+
resolved = resolved_config.get("model", {}).get("container")
173+
aliases = (cluster_config or {}).get("containers") or {}
174+
source = "srtslurm.yaml:containers" if raw in aliases else "literal"
175+
176+
if not raw or not resolved:
177+
issue = PreflightIssue(
178+
code="container-missing",
179+
field="model.container",
180+
message="model.container is required",
181+
)
182+
return (
183+
PreflightResolution(
184+
field="model.container",
185+
raw=raw,
186+
resolved=resolved,
187+
source=source,
188+
ok=False,
189+
message=issue.message,
190+
),
191+
[issue],
192+
)
193+
194+
ok, detail = _check_path(_expand_path(resolved), expect="file")
195+
if ok:
196+
return (
197+
PreflightResolution(
198+
field="model.container",
199+
raw=raw,
200+
resolved=str(Path(_expand_path(resolved)).resolve()),
201+
source=source,
202+
ok=True,
203+
message=detail,
204+
),
205+
[],
206+
)
207+
208+
if source == "srtslurm.yaml:containers":
209+
message = (
210+
f"Container alias '{raw}' resolved to '{resolved}', but that file is unavailable. "
211+
"Provide or register the container yourself before submitting."
212+
)
213+
else:
214+
message = (
215+
f"Container '{raw}' is not a local container path and is not defined in "
216+
"srtslurm.yaml containers. Provide or register the container yourself before submitting."
217+
)
218+
issue = PreflightIssue(
219+
code="container-not-available",
220+
field="model.container",
221+
message=message,
222+
)
223+
return (
224+
PreflightResolution(
225+
field="model.container",
226+
raw=raw,
227+
resolved=resolved,
228+
source=source,
229+
ok=False,
230+
message=message,
231+
),
232+
[issue],
233+
)
234+
235+
236+
def preflight_config_variants(
237+
raw_config: dict[str, Any],
238+
*,
239+
cluster_config: dict[str, Any] | None = None,
240+
selector: str | None = None,
241+
) -> list[PreflightResult]:
242+
active_cluster_config = load_cluster_config() if cluster_config is None else cluster_config
243+
variants = (
244+
generate_override_configs(raw_config, selector=selector)
245+
if "base" in raw_config
246+
else [("base", raw_config)]
247+
)
248+
results: list[PreflightResult] = []
249+
for suffix, variant in variants:
250+
resolved = resolve_config_with_defaults(variant, active_cluster_config)
251+
model, model_issues = _preflight_model(variant, resolved, active_cluster_config)
252+
container, container_issues = _preflight_container(
253+
variant, resolved, active_cluster_config
254+
)
255+
issues = [*model_issues, *container_issues]
256+
results.append(
257+
PreflightResult(
258+
variant=suffix,
259+
ok=not issues,
260+
model=model,
261+
container=container,
262+
errors=issues,
263+
)
264+
)
265+
return results
266+
267+
39268
def validate_local_path(name: str, path: str) -> ValidationResult:
40269
"""Check that a local file or directory exists."""
41270
try:

src/srtctl/mcp/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""MCP surface for srtctl."""

0 commit comments

Comments
 (0)