1212from __future__ import annotations
1313
1414import logging
15+ import os
1516import threading
1617from dataclasses import dataclass
1718from pathlib import Path
18- from typing import TYPE_CHECKING
19+ from typing import TYPE_CHECKING , Any
1920
2021import requests
2122
23+ from srtctl .core .config import (
24+ generate_override_configs ,
25+ load_cluster_config ,
26+ resolve_config_with_defaults ,
27+ )
28+
2229if TYPE_CHECKING :
2330 from srtctl .core .schema import SrtConfig
2431
@@ -36,6 +43,228 @@ class ValidationResult:
3643 message : str
3744
3845
46+ @dataclass (frozen = True )
47+ class PreflightIssue :
48+ code : str
49+ field : str
50+ message : str
51+
52+
53+ @dataclass (frozen = True )
54+ class PreflightResolution :
55+ field : str
56+ raw : str | None
57+ resolved : str | None
58+ source : str
59+ ok : bool
60+ message : str
61+
62+
63+ @dataclass (frozen = True )
64+ class PreflightResult :
65+ variant : str
66+ ok : bool
67+ model : PreflightResolution
68+ container : PreflightResolution
69+ errors : list [PreflightIssue ]
70+
71+ def as_dict (self ) -> dict [str , Any ]:
72+ return {
73+ "variant" : self .variant ,
74+ "ok" : self .ok ,
75+ "model" : self .model .__dict__ ,
76+ "container" : self .container .__dict__ ,
77+ "errors" : [issue .__dict__ for issue in self .errors ],
78+ }
79+
80+
81+ def _expand_path (value : str ) -> str :
82+ return os .path .expanduser (os .path .expandvars (value ))
83+
84+
85+ def _check_path (path_str : str , * , expect : str ) -> tuple [bool , str ]:
86+ path = Path (path_str ).resolve ()
87+ if not path .exists ():
88+ return False , f"not found: { path } "
89+ if expect == "dir" and not path .is_dir ():
90+ return False , f"not a directory: { path } "
91+ if expect == "file" and not path .is_file ():
92+ return False , f"not a file: { path } "
93+ return True , f"exists: { path } "
94+
95+
96+ def _preflight_model (
97+ raw_config : dict [str , Any ],
98+ resolved_config : dict [str , Any ],
99+ cluster_config : dict [str , Any ] | None ,
100+ ) -> tuple [PreflightResolution , list [PreflightIssue ]]:
101+ raw = raw_config .get ("model" , {}).get ("path" )
102+ resolved = resolved_config .get ("model" , {}).get ("path" )
103+ aliases = (cluster_config or {}).get ("model_paths" ) or {}
104+ source = "srtslurm.yaml:model_paths" if raw in aliases else "literal"
105+
106+ if not raw or not resolved :
107+ issue = PreflightIssue (
108+ code = "model-missing" ,
109+ field = "model.path" ,
110+ message = "model.path is required" ,
111+ )
112+ return (
113+ PreflightResolution (
114+ field = "model.path" ,
115+ raw = raw ,
116+ resolved = resolved ,
117+ source = source ,
118+ ok = False ,
119+ message = issue .message ,
120+ ),
121+ [issue ],
122+ )
123+
124+ ok , detail = _check_path (_expand_path (resolved ), expect = "dir" )
125+ if ok :
126+ return (
127+ PreflightResolution (
128+ field = "model.path" ,
129+ raw = raw ,
130+ resolved = str (Path (_expand_path (resolved )).resolve ()),
131+ source = source ,
132+ ok = True ,
133+ message = detail ,
134+ ),
135+ [],
136+ )
137+
138+ if source == "srtslurm.yaml:model_paths" :
139+ message = (
140+ f"Model alias '{ raw } ' resolved to '{ resolved } ', but that path is unavailable. "
141+ "Pull or register the model yourself before submitting."
142+ )
143+ else :
144+ message = (
145+ f"Model '{ raw } ' is not a local model path and is not defined in srtslurm.yaml "
146+ "model_paths. Pull or register the model yourself before submitting."
147+ )
148+ issue = PreflightIssue (
149+ code = "model-not-available" ,
150+ field = "model.path" ,
151+ message = message ,
152+ )
153+ return (
154+ PreflightResolution (
155+ field = "model.path" ,
156+ raw = raw ,
157+ resolved = resolved ,
158+ source = source ,
159+ ok = False ,
160+ message = message ,
161+ ),
162+ [issue ],
163+ )
164+
165+
166+ def _preflight_container (
167+ raw_config : dict [str , Any ],
168+ resolved_config : dict [str , Any ],
169+ cluster_config : dict [str , Any ] | None ,
170+ ) -> tuple [PreflightResolution , list [PreflightIssue ]]:
171+ raw = raw_config .get ("model" , {}).get ("container" )
172+ resolved = resolved_config .get ("model" , {}).get ("container" )
173+ aliases = (cluster_config or {}).get ("containers" ) or {}
174+ source = "srtslurm.yaml:containers" if raw in aliases else "literal"
175+
176+ if not raw or not resolved :
177+ issue = PreflightIssue (
178+ code = "container-missing" ,
179+ field = "model.container" ,
180+ message = "model.container is required" ,
181+ )
182+ return (
183+ PreflightResolution (
184+ field = "model.container" ,
185+ raw = raw ,
186+ resolved = resolved ,
187+ source = source ,
188+ ok = False ,
189+ message = issue .message ,
190+ ),
191+ [issue ],
192+ )
193+
194+ ok , detail = _check_path (_expand_path (resolved ), expect = "file" )
195+ if ok :
196+ return (
197+ PreflightResolution (
198+ field = "model.container" ,
199+ raw = raw ,
200+ resolved = str (Path (_expand_path (resolved )).resolve ()),
201+ source = source ,
202+ ok = True ,
203+ message = detail ,
204+ ),
205+ [],
206+ )
207+
208+ if source == "srtslurm.yaml:containers" :
209+ message = (
210+ f"Container alias '{ raw } ' resolved to '{ resolved } ', but that file is unavailable. "
211+ "Provide or register the container yourself before submitting."
212+ )
213+ else :
214+ message = (
215+ f"Container '{ raw } ' is not a local container path and is not defined in "
216+ "srtslurm.yaml containers. Provide or register the container yourself before submitting."
217+ )
218+ issue = PreflightIssue (
219+ code = "container-not-available" ,
220+ field = "model.container" ,
221+ message = message ,
222+ )
223+ return (
224+ PreflightResolution (
225+ field = "model.container" ,
226+ raw = raw ,
227+ resolved = resolved ,
228+ source = source ,
229+ ok = False ,
230+ message = message ,
231+ ),
232+ [issue ],
233+ )
234+
235+
236+ def preflight_config_variants (
237+ raw_config : dict [str , Any ],
238+ * ,
239+ cluster_config : dict [str , Any ] | None = None ,
240+ selector : str | None = None ,
241+ ) -> list [PreflightResult ]:
242+ active_cluster_config = load_cluster_config () if cluster_config is None else cluster_config
243+ variants = (
244+ generate_override_configs (raw_config , selector = selector )
245+ if "base" in raw_config
246+ else [("base" , raw_config )]
247+ )
248+ results : list [PreflightResult ] = []
249+ for suffix , variant in variants :
250+ resolved = resolve_config_with_defaults (variant , active_cluster_config )
251+ model , model_issues = _preflight_model (variant , resolved , active_cluster_config )
252+ container , container_issues = _preflight_container (
253+ variant , resolved , active_cluster_config
254+ )
255+ issues = [* model_issues , * container_issues ]
256+ results .append (
257+ PreflightResult (
258+ variant = suffix ,
259+ ok = not issues ,
260+ model = model ,
261+ container = container ,
262+ errors = issues ,
263+ )
264+ )
265+ return results
266+
267+
39268def validate_local_path (name : str , path : str ) -> ValidationResult :
40269 """Check that a local file or directory exists."""
41270 try :
0 commit comments