@@ -230,6 +230,10 @@ def get_param_defaults() -> Dict:
230230 # random seed for workload generation
231231 "random_seed" : 42 ,
232232
233+ ### Pipeline Params ###
234+ # maximum job time in seconds (0 = no limit)
235+ "max_job_seconds" : 0 ,
236+
233237 ### Estimator Params ###
234238 # estimator algorithm: "" (default, no estimator) or "noisy"
235239 "estimator_algo" : "" ,
@@ -315,8 +319,9 @@ def run_simulator(param_input: Union[str, Dict], workload: Workload = None) -> S
315319 handler .setFormatter (sim_formatter )
316320
317321 tick_number = 0
318- max_ticks = int (params ["duration" ] * params ["ticks_per_second" ])
319- logger .info (f"Running for { params ['duration' ]} s or { max_ticks } ticks" )
322+ max_simulation_ticks = int (params ["duration" ] * params ["ticks_per_second" ])
323+ max_job_ticks = int (params ["max_job_seconds" ] * ticks_per_second )
324+ logger .info (f"Running for { params ['duration' ]} s or { max_simulation_ticks } ticks" )
320325 logger .info (f"Running with random seed { params ['random_seed' ]} " )
321326
322327 # a pipeline may have many operators. These can get grouped
@@ -329,6 +334,13 @@ def run_simulator(param_input: Union[str, Dict], workload: Workload = None) -> S
329334 num_failures = 0
330335 failure_error_counts = defaultdict (int )
331336 executor_results = []
337+ # outstanding_pipelines tracks pipelines we still expect to complete.
338+ # Pipelines are removed when they succeed or time out. Timed-out
339+ # pipelines simply show up as not completed — we don't record their
340+ # latency. Note: the scheduler manages its own queues independently,
341+ # so it may still assign ops for a pipeline that has already been
342+ # removed from here. That's fine — the executor's _run_out_of_time_killer
343+ # will immediately kill any such containers.
332344 outstanding_pipelines : Dict [str , Pipeline ] = {}
333345 pipeline_arrivals_by_priority : Dict [Priority , int ] = {
334346 Priority .QUERY : 0 ,
@@ -344,22 +356,22 @@ def run_simulator(param_input: Union[str, Dict], workload: Workload = None) -> S
344356 memory_consumed_percent_samples : List [float ] = []
345357
346358 # IMPORTANT! This is the main simulation loop.
347- for tick_number in range (max_ticks ):
359+ for tick_number in range (max_simulation_ticks ):
348360 sim_formatter .set_simulated_elapsed_seconds (tick_number / ticks_per_second )
349361
350362 # track new work
351363 new_pipelines : List [Pipeline ] = workload .run_one_tick ()
352364 for p in new_pipelines :
353365 logger .info (f"Pipeline arrived with Priority { p .priority } and { len (p .values )} op(s)" )
354- p .runtime_status ().record_arrival (tick_number )
366+ p .runtime_status ().record_arrival (tick_number , max_job_ticks )
355367 outstanding_pipelines [p .pipeline_id ] = p
356368 pipeline_arrivals_by_priority [p .priority ] += 1
357369 for op in p .values :
358370 estimator .estimate (op )
359371
360372 # simulate scheduler/executor
361373 suspensions , assignments = scheduler .run_one_tick (executor_results , new_pipelines )
362- executor_results = executor .run_one_tick (suspensions , assignments )
374+ executor_results = executor .run_one_tick (tick_number , suspensions , assignments )
363375
364376 # track stats
365377 num_pipelines_created += len (new_pipelines )
@@ -380,6 +392,14 @@ def run_simulator(param_input: Union[str, Dict], workload: Workload = None) -> S
380392 pipeline_latencies_by_priority [pipeline .priority ].append (latency_ticks )
381393 del outstanding_pipelines [pipeline_id ]
382394
395+ # Optimization: periodically remove timed-out pipelines so we
396+ # don't keep scanning them for completion every tick.
397+ if max_job_ticks > 0 and tick_number % ticks_per_second == 0 :
398+ for pipeline_id in list (outstanding_pipelines .keys ()):
399+ pipeline = outstanding_pipelines [pipeline_id ]
400+ if pipeline .runtime_status ().has_timed_out (tick_number ):
401+ del outstanding_pipelines [pipeline_id ]
402+
383403 # log memory stats every 1 second of simulated time
384404 if tick_number % ticks_per_second == 0 :
385405 total_ram = executor .get_total_ram_gb ()
0 commit comments