@@ -17,6 +17,9 @@ const LEGACY_PROJECT_CLAUDE_MD = join(process.cwd(), ".claude", "CLAUDE.md");
1717const CLAUDECLAW_BLOCK_START = "<!-- claudeclaw:managed:start -->" ;
1818const CLAUDECLAW_BLOCK_END = "<!-- claudeclaw:managed:end -->" ;
1919
20+ // Grace period between SIGTERM and SIGKILL when a subprocess times out.
21+ const SIGKILL_GRACE_MS = 5_000 ;
22+
2023export interface RunResult {
2124 stdout : string ;
2225 stderr : string ;
@@ -45,6 +48,7 @@ function parseRateLimitResetTime(text: string): number | null {
4548 const now = new Date ( ) ;
4649 const reset = new Date ( now ) ;
4750 reset . setUTCHours ( hours , minutes , 0 , 0 ) ;
51+ // If the reset time is in the past, it means tomorrow
4852 if ( reset . getTime ( ) <= now . getTime ( ) ) {
4953 reset . setUTCDate ( reset . getUTCDate ( ) + 1 ) ;
5054 }
@@ -140,12 +144,36 @@ function buildChildEnv(baseEnv: Record<string, string>, model: string, api: stri
140144 return childEnv ;
141145}
142146
147+ /**
148+ * Resolve the subprocess timeout (in ms) for a given invocation name.
149+ * Values are read fresh from settings on every call, so hot-reload works
150+ * automatically: edit settings.json and the next subprocess picks it up.
151+ *
152+ * Name mapping:
153+ * "telegram" → settings.timeouts.telegram (default 5 min)
154+ * "heartbeat" → settings.timeouts.heartbeat (default 5 min)
155+ * anything else (jobs, bootstrap, trigger…) → settings.timeouts.job (default 30 min)
156+ */
157+ function resolveTimeoutMs ( name : string ) : number {
158+ const t = getSettings ( ) . timeouts ;
159+ let minutes : number ;
160+ if ( name === "telegram" ) {
161+ minutes = t . telegram ;
162+ } else if ( name === "heartbeat" ) {
163+ minutes = t . heartbeat ;
164+ } else {
165+ minutes = t . job ;
166+ }
167+ return minutes * 60_000 ;
168+ }
169+
143170async function runClaudeOnce (
144171 baseArgs : string [ ] ,
145172 model : string ,
146173 api : string ,
147- baseEnv : Record < string , string >
148- ) : Promise < { rawStdout : string ; stderr : string ; exitCode : number } > {
174+ baseEnv : Record < string , string > ,
175+ timeoutMs : number
176+ ) : Promise < { rawStdout : string ; stderr : string ; exitCode : number ; timedOut : boolean } > {
149177 const args = [ ...baseArgs ] ;
150178 const normalizedModel = model . trim ( ) . toLowerCase ( ) ;
151179 if ( model . trim ( ) && normalizedModel !== "glm" ) args . push ( "--model" , model . trim ( ) ) ;
@@ -157,17 +185,33 @@ async function runClaudeOnce(
157185 } ) ;
158186
159187 activeProc = proc ;
188+
189+ let timedOut = false ;
190+ let sigkillTimer : ReturnType < typeof setTimeout > | null = null ;
191+
192+ const killTimer = setTimeout ( ( ) => {
193+ timedOut = true ;
194+ try { proc . kill ( "SIGTERM" ) ; } catch { /* already dead */ }
195+ sigkillTimer = setTimeout ( ( ) => {
196+ try { proc . kill ( "SIGKILL" ) ; } catch { /* already dead */ }
197+ } , SIGKILL_GRACE_MS ) ;
198+ } , timeoutMs ) ;
199+
160200 const [ rawStdout , stderr ] = await Promise . all ( [
161201 new Response ( proc . stdout ) . text ( ) ,
162202 new Response ( proc . stderr ) . text ( ) ,
163203 ] ) ;
164204 await proc . exited ;
165205 if ( activeProc === proc ) activeProc = null ;
166206
207+ clearTimeout ( killTimer ) ;
208+ if ( sigkillTimer !== null ) clearTimeout ( sigkillTimer ) ;
209+
167210 return {
168211 rawStdout,
169212 stderr,
170213 exitCode : proc . exitCode ?? 1 ,
214+ timedOut,
171215 } ;
172216}
173217
@@ -208,8 +252,9 @@ async function runClaudeStreaming(
208252 api : string ,
209253 baseEnv : Record < string , string > ,
210254 onChunk ?: ( text : string ) => void ,
211- onToolEvent ?: ( line : string ) => void
212- ) : Promise < { result : string ; stderr : string ; exitCode : number ; sessionId ?: string ; isRateLimit : boolean } > {
255+ onToolEvent ?: ( line : string ) => void ,
256+ timeoutMs ?: number
257+ ) : Promise < { result : string ; stderr : string ; exitCode : number ; sessionId ?: string ; isRateLimit : boolean ; timedOut : boolean } > {
213258 const args = [ ...baseArgs ] ;
214259 const normalizedModel = model . trim ( ) . toLowerCase ( ) ;
215260 if ( model . trim ( ) && normalizedModel !== "glm" ) args . push ( "--model" , model . trim ( ) ) ;
@@ -221,6 +266,21 @@ async function runClaudeStreaming(
221266 } ) ;
222267
223268 activeProc = proc ;
269+
270+ let timedOut = false ;
271+ let sigkillTimer : ReturnType < typeof setTimeout > | null = null ;
272+ let killTimer : ReturnType < typeof setTimeout > | null = null ;
273+
274+ if ( timeoutMs ) {
275+ killTimer = setTimeout ( ( ) => {
276+ timedOut = true ;
277+ try { proc . kill ( "SIGTERM" ) ; } catch { /* already dead */ }
278+ sigkillTimer = setTimeout ( ( ) => {
279+ try { proc . kill ( "SIGKILL" ) ; } catch { /* already dead */ }
280+ } , SIGKILL_GRACE_MS ) ;
281+ } , timeoutMs ) ;
282+ }
283+
224284 const stderrPromise = new Response ( proc . stderr ) . text ( ) ;
225285
226286 let finalResult = "" ;
@@ -297,11 +357,14 @@ async function runClaudeStreaming(
297357 await proc . exited ;
298358 if ( activeProc === proc ) activeProc = null ;
299359
360+ if ( killTimer !== null ) clearTimeout ( killTimer ) ;
361+ if ( sigkillTimer !== null ) clearTimeout ( sigkillTimer ) ;
362+
300363 const stderr = await stderrPromise ;
301364 // Also check stderr for rate limit signals
302365 if ( ! isRateLimit ) isRateLimit = RATE_LIMIT_PATTERN . test ( stderr ) ;
303366
304- return { result : finalResult , stderr, exitCode : proc . exitCode ?? 1 , sessionId, isRateLimit } ;
367+ return { result : finalResult , stderr, exitCode : proc . exitCode ?? 1 , sessionId, isRateLimit, timedOut } ;
305368}
306369
307370const PROJECT_DIR = process . cwd ( ) ;
@@ -465,14 +528,17 @@ async function execClaude(name: string, prompt: string, onChunk?: (text: string)
465528 primaryConfig = { model, api } ;
466529 }
467530
531+ const timeoutMs = resolveTimeoutMs ( name ) ;
532+ const timeoutMin = timeoutMs / 60_000 ;
533+
468534 const fallbackConfig : ModelConfig = {
469535 model : fallback ?. model ?? "" ,
470536 api : fallback ?. api ?? "" ,
471537 } ;
472538 const securityArgs = buildSecurityArgs ( security ) ;
473539
474540 console . log (
475- `[${ new Date ( ) . toLocaleTimeString ( ) } ] Running: ${ name } (${ isNew ? "new session" : `resume ${ existing . sessionId . slice ( 0 , 8 ) } ` } , security: ${ security . level } )`
541+ `[${ new Date ( ) . toLocaleTimeString ( ) } ] Running: ${ name } (${ isNew ? "new session" : `resume ${ existing . sessionId . slice ( 0 , 8 ) } ` } , security: ${ security . level } , timeout: ${ timeoutMin } m )`
476542 ) ;
477543
478544 // Always use stream-json — session_id comes from the result event for both new and resumed
@@ -505,33 +571,37 @@ async function execClaude(name: string, prompt: string, onChunk?: (text: string)
505571 const { CLAUDECODE : _ , ...cleanEnv } = process . env ;
506572 const baseEnv = { ...cleanEnv } as Record < string , string > ;
507573
508- let exec = await runClaudeStreaming ( args , primaryConfig . model , primaryConfig . api , baseEnv , onChunk , onToolEvent ) ;
574+ let exec = await runClaudeStreaming ( args , primaryConfig . model , primaryConfig . api , baseEnv , onChunk , onToolEvent , timeoutMs ) ;
509575 let usedFallback = false ;
510576
511- if ( exec . isRateLimit && hasModelConfig ( fallbackConfig ) && ! sameModelConfig ( primaryConfig , fallbackConfig ) ) {
577+ if ( exec . timedOut ) {
578+ console . warn (
579+ `[${ new Date ( ) . toLocaleTimeString ( ) } ] TIMEOUT: ${ name } subprocess killed after ${ timeoutMin } m (SIGTERM+SIGKILL)`
580+ ) ;
581+ }
582+
583+ // Only retry with fallback on rate limit — not on timeout
584+ if ( ! exec . timedOut && exec . isRateLimit && hasModelConfig ( fallbackConfig ) && ! sameModelConfig ( primaryConfig , fallbackConfig ) ) {
512585 console . warn (
513586 `[${ new Date ( ) . toLocaleTimeString ( ) } ] Claude limit reached; retrying with fallback${ fallbackConfig . model ? ` (${ fallbackConfig . model } )` : "" } ...`
514587 ) ;
515- // Strip --resume to avoid mixing thinking block signatures from
516- // different providers in the same session history (see issue #18).
517588 const fallbackArgs = args . filter (
518589 ( a ) => a !== "--resume" && a !== existing ?. sessionId
519590 ) ;
520- exec = await runClaudeStreaming ( fallbackArgs , fallbackConfig . model , fallbackConfig . api , baseEnv , onChunk , onToolEvent ) ;
591+ exec = await runClaudeStreaming ( fallbackArgs , fallbackConfig . model , fallbackConfig . api , baseEnv , onChunk , onToolEvent , timeoutMs ) ;
521592 usedFallback = true ;
522593 }
523594
524595 // Auto-detect corrupted session from thinking block signature mismatch.
525- // Back up the broken session and retry with a fresh one (issue #18).
526- if ( exec . exitCode !== 0 && ! isNew && SIGNATURE_ERROR . test ( ( exec . result ?? "" ) + exec . stderr ) ) {
596+ if ( ! exec . timedOut && exec . exitCode !== 0 && ! isNew && SIGNATURE_ERROR . test ( ( exec . result ?? "" ) + exec . stderr ) ) {
527597 const backupName = await backupSession ( ) ;
528598 console . warn (
529599 `[${ new Date ( ) . toLocaleTimeString ( ) } ] Detected corrupted session (thinking block signature mismatch). Backed up to ${ backupName } , retrying with fresh session...`
530600 ) ;
531601 const freshArgs = args . filter (
532602 ( a ) => a !== "--resume" && a !== existing ?. sessionId
533603 ) ;
534- exec = await runClaudeStreaming ( freshArgs , primaryConfig . model , primaryConfig . api , baseEnv , onChunk , onToolEvent ) ;
604+ exec = await runClaudeStreaming ( freshArgs , primaryConfig . model , primaryConfig . api , baseEnv , onChunk , onToolEvent , timeoutMs ) ;
535605 }
536606
537607 const { result : stdout , stderr, exitCode, sessionId : streamedSessionId } = exec ;
@@ -541,7 +611,7 @@ async function execClaude(name: string, prompt: string, onChunk?: (text: string)
541611 if ( exec . isRateLimit ) {
542612 const combined = stdout + stderr ;
543613 const resetTime = parseRateLimitResetTime ( combined ) ;
544- rateLimitResetAt = resetTime ?? ( Date . now ( ) + 60 * 60_000 ) ; // fallback: 1 hour
614+ rateLimitResetAt = resetTime ?? ( Date . now ( ) + 60 * 60_000 ) ;
545615 rateLimitNotified = false ;
546616 console . warn (
547617 `[${ new Date ( ) . toLocaleTimeString ( ) } ] Rate limit detected. Reset at: ${ new Date ( rateLimitResetAt ) . toISOString ( ) } `
@@ -561,6 +631,7 @@ async function execClaude(name: string, prompt: string, onChunk?: (text: string)
561631 `Date: ${ new Date ( ) . toISOString ( ) } ` ,
562632 `Session: ${ sessionId } (${ isNew ? "new" : "resumed" } )` ,
563633 `Model config: ${ usedFallback ? "fallback" : "primary" } ` ,
634+ `Timeout: ${ timeoutMin } m${ exec . timedOut ? " [TIMED OUT]" : "" } ` ,
564635 ...( agentic . enabled ? [ `Task type: ${ taskType } ` , `Routing: ${ routingReasoning } ` ] : [ ] ) ,
565636 `Prompt: ${ prompt } ` ,
566637 `Exit code: ${ exitCode } ` ,
0 commit comments