@@ -151,6 +151,7 @@ def __init__(self, batch_size=5):
151151
152152 self .submitted_batches = set () # Track which batch numbers have been submitted
153153 self .job_mapping = {} # Map test_id -> SLURM job_id
154+ self .submit_errors = {} # Map test_id -> error message (validation/submission failures)
154155 self .all_configs = [] # Ordered list of all test configs
155156
156157 logger .info (f"\n { '=' * 70 } " )
@@ -214,6 +215,8 @@ def _submit_batch(self, batch_num):
214215 batch_num: Batch number to submit (0-indexed)
215216 """
216217 from execution .executor import JobManager
218+ from utils .config_validator import ConfigValidator
219+ from utils .job_tracker import JobTracker
217220
218221 # Calculate batch range
219222 if self .batch_size :
@@ -230,33 +233,56 @@ def _submit_batch(self, batch_num):
230233 logger .info (f"Range: [{ start_idx } :{ end_idx } ] ({ len (batch_configs )} jobs)" )
231234 logger .info (f"{ '=' * 70 } \n " )
232235
233- # Submit all jobs in this batch
236+ # Pre-validate all configs before submission
237+ logger .info ("Pre-validating configurations..." )
238+ valid_configs = []
239+ for config in batch_configs :
240+ try :
241+ ConfigValidator .validate_test_config (config )
242+ valid_configs .append (config )
243+ except Exception as e :
244+ # Validation failed - mark as None and record error
245+ self .job_mapping [config .test_id ] = None
246+ self .submit_errors [config .test_id ] = f"Validation failed: { str (e )} "
247+ logger .error (f" [FAILED] Validation failed: { config .test_id } " )
248+ logger .error (f" Error: { str (e )[:100 ]} " )
249+
250+ logger .info (
251+ f"Validation complete: { len (valid_configs )} /{ len (batch_configs )} configs valid\n "
252+ )
253+
254+ # Submit only valid configs
234255 success_count = 0
235- for i , config in enumerate (batch_configs , 1 ):
256+ for i , config in enumerate (valid_configs , 1 ):
236257 try :
237258 success , job_id = JobManager .submit_test_job (config )
238259 if success and job_id :
239260 self .job_mapping [config .test_id ] = job_id
261+ JobTracker .record_job (job_id ) # Record job ID for cleanup
240262 success_count += 1
241- # Truncate test_id for display
242- display_id = (
243- config .test_id [:60 ] + "..." if len (config .test_id ) > 60 else config .test_id
263+ logger .success (
264+ f" [{ i :3d} /{ len (valid_configs )} ] Job { job_id } <- { config .test_id } "
244265 )
245- logger .success (f" [{ i :3d} /{ len (batch_configs )} ] Job { job_id } <- { display_id } " )
246266 else :
267+ # Submission failed - mark as None and record error
247268 self .job_mapping [config .test_id ] = None
248- logger .error (f" [{ i :3d} /{ len (batch_configs )} ] Failed: { config .test_id [:50 ]} " )
269+ self .submit_errors [config .test_id ] = f"Job submission failed: { job_id } "
270+ logger .error (f" [{ i :3d} /{ len (valid_configs )} ] Failed: { config .test_id } " )
249271 except Exception as e :
272+ # Submission exception - mark as None and record error
250273 self .job_mapping [config .test_id ] = None
251- logger .error (f" [{ i :3d} /{ len (batch_configs )} ] Error: { e } " )
274+ self .submit_errors [config .test_id ] = f"Submission exception: { str (e )} "
275+ logger .error (f" [{ i :3d} /{ len (valid_configs )} ] Error: { e } " )
252276
253277 # Mark batch as submitted
254278 self .submitted_batches .add (batch_num )
255279
256280 logger .info (f"\n { '=' * 70 } " )
257281 logger .success (
258- f"Batch { batch_num } Complete: { success_count } /{ len (batch_configs )} succeeded "
282+ f"Batch { batch_num } Complete: { success_count } /{ len (valid_configs )} submitted successfully "
259283 )
284+ if len (valid_configs ) < len (batch_configs ):
285+ logger .warning (f"Skipped { len (batch_configs ) - len (valid_configs )} invalid config(s)" )
260286 logger .info (f"{ '=' * 70 } \n " )
261287
262288
0 commit comments