88import Foundation
99import Combine
1010
11+ private final class ToolBodyTimeoutRaceState : @unchecked Sendable {
12+ private let lock = NSLock ( )
13+ private var continuation : CheckedContinuation < String , Never > ?
14+ private var bodyTask : Task < Void , Never > ?
15+ private var timeoutTask : Task < Void , Never > ?
16+ private var cancelBodyWhenSet = false
17+ private var cancelTimeoutWhenSet = false
18+
19+ init ( continuation: CheckedContinuation < String , Never > ) {
20+ self . continuation = continuation
21+ }
22+
23+ func setTasks( body: Task < Void , Never > , timeout: Task < Void , Never > ) {
24+ lock. lock ( )
25+ bodyTask = body
26+ timeoutTask = timeout
27+ let shouldCancelBody = cancelBodyWhenSet
28+ let shouldCancelTimeout = cancelTimeoutWhenSet
29+ lock. unlock ( )
30+
31+ if shouldCancelBody { body. cancel ( ) }
32+ if shouldCancelTimeout { timeout. cancel ( ) }
33+ }
34+
35+ func finish( with result: String , cancelBody: Bool , cancelTimeout: Bool ) {
36+ lock. lock ( )
37+ guard let continuation else {
38+ lock. unlock ( )
39+ return
40+ }
41+ self . continuation = nil
42+
43+ let bodyToCancel = cancelBody ? bodyTask : nil
44+ let timeoutToCancel = cancelTimeout ? timeoutTask : nil
45+ if cancelBody, bodyTask == nil {
46+ cancelBodyWhenSet = true
47+ }
48+ if cancelTimeout, timeoutTask == nil {
49+ cancelTimeoutWhenSet = true
50+ }
51+ lock. unlock ( )
52+
53+ bodyToCancel? . cancel ( )
54+ timeoutToCancel? . cancel ( )
55+ continuation. resume ( returning: result)
56+ }
57+ }
58+
1159@MainActor
1260final class ToolRegistry : ObservableObject {
1361 static let shared = ToolRegistry ( )
@@ -334,7 +382,7 @@ final class ToolRegistry: ObservableObject {
334382 /// fall through unchanged: parsing is best-effort, and tool bodies
335383 /// keep their richer `requireXxx` helpers as the second line of
336384 /// defence.
337- private nonisolated static func preflight(
385+ nonisolated private static func preflight(
338386 argumentsJSON: String ,
339387 schema: JSONValue ? ,
340388 toolName: String
@@ -388,14 +436,13 @@ final class ToolRegistry: ObservableObject {
388436 /// tests can drive it with a small `timeoutSeconds` value without
389437 /// waiting for the full 120s production budget.
390438 ///
391- /// Each branch of the race converts thrown errors (including
392- /// `CancellationError` from the loser when we `cancelAll`) into a
393- /// structured `ToolEnvelope` *inside* its child task. That keeps
394- /// `withTaskGroup` non-throwing and prevents the cancelled sibling's
395- /// post-return throw from reaching the caller as the function's
396- /// error — historically the slow-tool case rethrew CancellationError
397- /// and stalled while the group drained.
398- internal nonisolated static func runToolBody(
439+ /// The body and timeout run as unstructured tasks rather than a task
440+ /// group. That is intentional: task-group scope exit drains cancelled
441+ /// children, so a non-cooperative tool body can still delay the timeout
442+ /// response until it returns. The race state resumes the caller once and
443+ /// cancels the loser without waiting for that loser to observe
444+ /// cancellation.
445+ nonisolated static func runToolBody(
399446 _ tool: OsaurusTool ,
400447 argumentsJSON: String ,
401448 timeoutSeconds: TimeInterval
@@ -408,49 +455,33 @@ final class ToolRegistry: ObservableObject {
408455 tool: toolName,
409456 retryable: true
410457 )
411- // Sentinel returned by the cancelled loser branch so the
412- // consumer loop knows to ignore it. Cannot collide with any
413- // legitimate envelope because real envelopes are JSON.
414- let cancelledSentinel = " __osaurus_runToolBody_cancelled__ "
415-
416- return await withTaskGroup ( of: String . self) { group in
417- group. addTask {
458+ return await withCheckedContinuation { continuation in
459+ let race = ToolBodyTimeoutRaceState ( continuation: continuation)
460+ let bodyTask = Task {
418461 do {
419- return try await tool. execute ( argumentsJSON: argumentsJSON)
462+ let result = try await tool. execute ( argumentsJSON: argumentsJSON)
463+ race. finish ( with: result, cancelBody: false , cancelTimeout: true )
420464 } catch is CancellationError {
421- return cancelledSentinel
465+ // A cooperative loser should not overwrite the timeout
466+ // envelope. If cancellation happened before the timeout
467+ // fired, the timeout task remains responsible for the
468+ // structured result.
469+ return
422470 } catch {
423- return ToolEnvelope . fromError ( error, tool: toolName)
471+ let result = ToolEnvelope . fromError ( error, tool: toolName)
472+ race. finish ( with: result, cancelBody: false , cancelTimeout: true )
424473 }
425474 }
426- group . addTask {
427- let nanos = UInt64 ( timeoutSeconds * 1_000_000_000 )
475+ let timeoutTask = Task {
476+ let nanos = UInt64 ( max ( 0 , timeoutSeconds) * 1_000_000_000 )
428477 do {
429478 try await Task . sleep ( nanoseconds: nanos)
430479 } catch {
431- // Cancelled because the body finished first — yield
432- // the sentinel so the caller's first non-sentinel
433- // result wins.
434- return cancelledSentinel
480+ return
435481 }
436- return timeoutEnvelope
437- }
438-
439- // The first non-sentinel result is the winner; cancel the
440- // sibling and let `withTaskGroup` auto-drain on closure
441- // return. The drain is safe because every child branch
442- // converts its own errors into envelope strings — there
443- // are no uncaught throws to surface.
444- for await result in group {
445- if result == cancelledSentinel { continue }
446- group. cancelAll ( )
447- return result
482+ race. finish ( with: timeoutEnvelope, cancelBody: true , cancelTimeout: false )
448483 }
449- return ToolEnvelope . failure (
450- kind: . executionError,
451- message: " Tool ' \( toolName) ' produced no result. " ,
452- tool: toolName
453- )
484+ race. setTasks ( body: bodyTask, timeout: timeoutTask)
454485 }
455486 }
456487
0 commit comments