@@ -103,6 +103,7 @@ def __init__(self, workspace: Union[str, Path, None] = None):
103103 tick_interval = DEFAULT_TICK_INTERVAL ,
104104 )
105105 self ._running = False
106+ self ._background_tasks : set [asyncio .Task ] = set ()
106107
107108 self .on_job_complete : List [Callable [[CronJobSpec , ExecutionResult ], Awaitable [None ]]] = []
108109 self .on_job_start : List [Callable [[CronJobSpec ], Awaitable [None ]]] = []
@@ -119,31 +120,69 @@ def manager(self) -> JobManager:
119120
120121 async def start (self ) -> None :
121122 self ._running = True
123+ self ._manager .repo .import_declarative ()
122124 self ._pid_manager .write_pid ()
123125 await self ._scheduler .start ()
124126
125- async def stop (self ) -> None :
127+ async def stop (self , force : bool = False , timeout : float = 30 ) -> None :
128+ """Stop the cron service.
129+
130+ Args:
131+ force: If True, cancel all in-flight jobs immediately.
132+ If False, wait up to `timeout` seconds for them to finish,
133+ then cancel any that remain.
134+ timeout: Seconds to wait for graceful drain (ignored if force=True).
135+ """
126136 self ._running = False
127137 self ._scheduler .stop ()
138+
139+ if self ._background_tasks :
140+ if force :
141+ for t in self ._background_tasks :
142+ t .cancel ()
143+ done , pending = await asyncio .wait (
144+ self ._background_tasks ,
145+ timeout = 0 if force else timeout ,
146+ )
147+ for t in pending :
148+ t .cancel ()
149+ if pending :
150+ await asyncio .gather (* pending , return_exceptions = True )
151+
128152 self ._pid_manager .remove_pid ()
129153
130154 def is_running (self ) -> bool :
131155 return self ._running
132156
133157 async def run_forever (self ) -> None :
134- """Run the scheduler loop until interrupted."""
158+ """Run the scheduler loop until interrupted.
159+
160+ SIGTERM → graceful stop (wait up to 30s for in-flight jobs).
161+ SIGINT → force stop (cancel all immediately).
162+ """
135163 await self .start ()
136164 stop_event = asyncio .Event ()
165+ self ._force_stop = False
166+
167+ def _graceful ():
168+ stop_event .set ()
169+
170+ def _force ():
171+ self ._force_stop = True
172+ stop_event .set ()
137173
138174 loop = asyncio .get_event_loop ()
139- for sig in (signal .SIGINT , signal .SIGTERM ):
140- try :
141- loop .add_signal_handler (sig , stop_event .set )
142- except (NotImplementedError , RuntimeError ):
143- pass
175+ try :
176+ loop .add_signal_handler (signal .SIGTERM , _graceful )
177+ except (NotImplementedError , RuntimeError ):
178+ pass
179+ try :
180+ loop .add_signal_handler (signal .SIGINT , _force )
181+ except (NotImplementedError , RuntimeError ):
182+ pass
144183
145184 await stop_event .wait ()
146- await self .stop ()
185+ await self .stop (force = self . _force_stop )
147186
148187 # === Job CRUD (delegates to manager) ===
149188
@@ -210,9 +249,18 @@ def stop_daemon(self) -> bool:
210249 # === Scheduler Callbacks ===
211250
212251 async def _on_due_jobs (self , due : List [Tuple [CronJobSpec , CronJobState ]]) -> None :
213- """Called by scheduler when jobs are due."""
214- tasks = [self ._execute_job (job , state ) for job , state in due ]
215- await asyncio .gather (* tasks , return_exceptions = True )
252+ """Called by scheduler when jobs are due.
253+
254+ Fire-and-forget: spawn tasks but do NOT await them, so the scheduler
255+ tick loop can re-arm immediately and pick up newly due jobs.
256+ """
257+ for job , state in due :
258+ task = asyncio .create_task (
259+ self ._execute_job (job , state ),
260+ name = f'cron-{ job .id } ' ,
261+ )
262+ self ._background_tasks .add (task )
263+ task .add_done_callback (self ._background_tasks .discard )
216264
217265 async def _execute_job (self , job : CronJobSpec , state : CronJobState ) -> None :
218266 self ._manager .mark_running (job .id )
@@ -225,6 +273,13 @@ async def _execute_job(self, job: CronJobSpec, state: CronJobState) -> None:
225273
226274 config = self ._build_config (job )
227275 result = await self ._executor .execute (job , config )
276+
277+ retries_left = job .max_retries
278+ while not result .success and retries_left > 0 :
279+ retries_left -= 1
280+ config = self ._build_config (job )
281+ result = await self ._executor .execute (job , config )
282+
228283 self ._manager .record_result (job , result )
229284
230285 for cb in self .on_job_complete :
@@ -247,11 +302,13 @@ async def _execute_job(self, job: CronJobSpec, state: CronJobState) -> None:
247302 def _build_config (self , job : CronJobSpec ) -> Any :
248303 """Build DictConfig for agent execution.
249304
250- Cron jobs run non-interactively, so we force:
251- - stream=False (executor expects List[Message], not AsyncGenerator)
252- - callbacks=[] (no input_callback; stdin is unavailable)
253- - max_chat_round capped (allows tool-calling rounds, prevents runaway)
254- - output/session paths under the cron workspace
305+ Config inheritance chain (later overrides earlier):
306+ 1. Project config (from job.project via Config.from_task)
307+ 2. Job-level overrides (from job.overrides dict)
308+ 3. Cron-mandatory overrides (stream=False, no interactive callbacks)
309+
310+ The project's max_chat_round is respected; a default of 50 is used
311+ only when no project config sets it.
255312 """
256313 from omegaconf import OmegaConf
257314
@@ -268,10 +325,20 @@ def _build_config(self, job: CronJobSpec) -> Any:
268325 config = OmegaConf .merge (config , OmegaConf .create (job .overrides ))
269326
270327 OmegaConf .update (config , 'generation_config.stream' , False , merge = True )
271- OmegaConf .update (config , 'callbacks' , [], merge = False )
272- current_rounds = getattr (config , 'max_chat_round' , None )
273- if current_rounds is None or current_rounds > 10 :
274- OmegaConf .update (config , 'max_chat_round' , 10 , merge = True )
328+
329+ existing_cbs = getattr (config , 'callbacks' , None )
330+ if existing_cbs :
331+ safe_cbs = [
332+ cb for cb in existing_cbs
333+ if cb != 'input_callback' and not str (cb ).endswith ('input_callback' )
334+ ]
335+ OmegaConf .update (config , 'callbacks' , safe_cbs , merge = False )
336+ else :
337+ OmegaConf .update (config , 'callbacks' , [], merge = False )
338+
339+ if getattr (config , 'max_chat_round' , None ) is None :
340+ OmegaConf .update (config , 'max_chat_round' , 50 , merge = True )
341+
275342 OmegaConf .update (
276343 config , 'session_log.dir' ,
277344 str (self ._workspace / 'sessions' ), merge = True
@@ -281,6 +348,11 @@ def _build_config(self, job: CronJobSpec) -> Any:
281348 str (self ._workspace / 'output' / job .id ), merge = True
282349 )
283350
351+ if job .session_mode == 'persistent' :
352+ OmegaConf .update (config , 'load_cache' , True , merge = True )
353+ OmegaConf .update (config , 'save_history' , True , merge = True )
354+ OmegaConf .update (config , 'tag' , f'cron-{ job .id } ' , merge = True )
355+
284356 return config
285357
286358 # === Manual Tick ===
@@ -305,6 +377,13 @@ async def run_job_now(self, job_id: str) -> Optional[ExecutionResult]:
305377
306378 config = self ._build_config (job )
307379 result = await self ._executor .execute (job , config )
380+
381+ retries_left = job .max_retries
382+ while not result .success and retries_left > 0 :
383+ retries_left -= 1
384+ config = self ._build_config (job )
385+ result = await self ._executor .execute (job , config )
386+
308387 self ._manager .record_result (job , result )
309388
310389 for cb in self .on_job_complete :
0 commit comments