(children); + const refs = await this.spawnBulk(children); + refs.forEach((ref) => { + // eslint-disable-next-line no-param-reassign + ref.defaultSignal = this.abortController.signal; + }); + return refs; } /** @@ -465,6 +492,9 @@ export class Context{ options?: ChildRunOpts ): Promise { const run = await this.spawn(workflow, input, options); + // Ensure waiting for the child result aborts when this task is cancelled. + // eslint-disable-next-line no-param-reassign + run.defaultSignal = this.abortController.signal; return run.output; } @@ -482,6 +512,7 @@ export class Context
{ options?: ChildRunOpts ): Promise > { const ref = await this.spawn(workflow, input, options); + ref.defaultSignal = this.abortController.signal; return ref; } @@ -586,6 +617,7 @@ export class Context { options?: ChildRunOpts; }> ): Promise []> { + this.throwIfCancelled(); const { workflowRunId, taskRunExternalId } = this.action; const workflowRuns = workflows.map(({ workflow, input, options }) => { @@ -608,11 +640,16 @@ export class Context { ); } + // `signal` must never be sent over the wire. + const optsWithoutSignal: Omit & { signal?: never } = { ...opts }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + delete (optsWithoutSignal as any).signal; + const resp = { workflowName: name, input, options: { - ...opts, + ...optsWithoutSignal, parentId: workflowRunId, parentTaskRunExternalId: taskRunExternalId, childIndex: this.spawnIndex, @@ -663,6 +700,7 @@ export class Context { input: Q, options?: ChildRunOpts ): Promise > { + this.throwIfCancelled(); const { workflowRunId, taskRunExternalId } = this.action; let workflowName: string = ''; @@ -733,6 +771,7 @@ export class DurableContext extends Context { * @returns A promise that resolves with the event that satisfied the conditions. */ async waitFor(conditions: Conditions | Conditions[]): Promise > { + this.throwIfCancelled(); const pbConditions = conditionsToPb(Render(ConditionAction.CREATE, conditions)); // eslint-disable-next-line no-plusplus @@ -743,13 +782,13 @@ export class DurableContext extends Context { sleepConditions: pbConditions.sleepConditions, userEventConditions: pbConditions.userEventConditions, }); - - const listener = this.v1._v0.durableListener.subscribe({ - taskId: this.action.taskRunExternalId, - signalKey: key, - }); - - const event = await listener.get(); + const event = await this.v1._v0.durableListener.result( + { + taskId: this.action.taskRunExternalId, + signalKey: key, + }, + { signal: this.abortController.signal } + ); // Convert event.data from Uint8Array to string if needed const eventData = diff --git a/sdks/typescript/src/v1/client/worker/worker-cancel-supervision.test.ts b/sdks/typescript/src/v1/client/worker/worker-cancel-supervision.test.ts new file mode 100644 index 0000000000..6ff90fbd11 --- /dev/null +++ b/sdks/typescript/src/v1/client/worker/worker-cancel-supervision.test.ts @@ -0,0 +1,94 @@ +import { V1Worker } from '@hatchet/v1/client/worker/worker-internal'; +import HatchetPromise from '@util/hatchet-promise/hatchet-promise'; + +describe('V1Worker handleCancelStepRun cancellation supervision', () => { + beforeEach(() => { + jest.useFakeTimers(); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + + it('logs warnings after threshold and grace period, then returns', async () => { + const logger = { + info: jest.fn(), + warn: jest.fn(), + debug: jest.fn(), + error: jest.fn(), + }; + + const taskExternalId = 'task-1'; + + // Use the real HatchetPromise behavior: cancel rejects the wrapper immediately, + // while the underlying work (`inner`) continues. + const inner = new Promise (() => { + // never resolves + }); + const future = new HatchetPromise(inner); + const originalCancel = future.cancel; + const cancelSpy = jest.fn((reason: any) => originalCancel(reason)); + future.cancel = cancelSpy; + + const ctx = { + abortController: new AbortController(), + }; + + const fakeThis: any = { + logger, + client: { + config: { + cancellation_warning_threshold: 300, + cancellation_grace_period: 1000, + }, + }, + cancellingTaskRuns: new Set(), + futures: { [taskExternalId]: future }, + contexts: { [taskExternalId]: ctx }, + }; + + const action: any = { taskRunExternalId: taskExternalId }; + + const p = V1Worker.prototype.handleCancelStepRun.call(fakeThis, action); + + await jest.advanceTimersByTimeAsync(1500); + await p; + + expect(ctx.abortController.signal.aborted).toBe(true); + expect(cancelSpy).toHaveBeenCalled(); + expect(logger.warn).toHaveBeenCalled(); + + expect(fakeThis.futures[taskExternalId]).toBeUndefined(); + expect(fakeThis.contexts[taskExternalId]).toBeUndefined(); + }); + + it('suppresses "was cancelled" debug log when cancellation is supervised', async () => { + const logger = { + info: jest.fn(), + warn: jest.fn(), + debug: jest.fn(), + error: jest.fn(), + }; + + const taskExternalId = 'task-2'; + + const fakeThis: any = { + logger, + cancellingTaskRuns: new Set([taskExternalId]), + }; + + // Reproduce the log suppression logic from the step execution path: + // we only log "was cancelled" if the cancellation isn't currently supervised. + const maybeLog = (e: any) => { + const message = e?.message || String(e); + if (message.includes('Cancelled')) { + if (!fakeThis.cancellingTaskRuns.has(taskExternalId)) { + fakeThis.logger.debug(`Task run ${taskExternalId} was cancelled`); + } + } + }; + + maybeLog(new Error('Cancelled by worker')); + expect(logger.debug).not.toHaveBeenCalled(); + }); +}); diff --git a/sdks/typescript/src/v1/client/worker/worker-internal.ts b/sdks/typescript/src/v1/client/worker/worker-internal.ts index c5dc58c94d..75ecebc417 100644 --- a/sdks/typescript/src/v1/client/worker/worker-internal.ts +++ b/sdks/typescript/src/v1/client/worker/worker-internal.ts @@ -19,7 +19,7 @@ import { DesiredWorkerLabels, WorkflowConcurrencyOpts, } from '@hatchet/protoc/workflows'; -import { Logger } from '@hatchet/util/logger'; +import { actionMap, Logger, taskRunLog } from '@hatchet/util/logger'; import { WebhookWorkerCreateRequest } from '@clients/rest/generated/data-contracts'; import { BaseWorkflowDeclaration, WorkflowDefinition, HatchetClient } from '@hatchet/v1'; import { CreateTaskOpts } from '@hatchet/protoc/v1/workflows'; @@ -36,6 +36,8 @@ import { zodToJsonSchema } from 'zod-to-json-schema'; import { WorkerLabels } from '@hatchet/clients/dispatcher/dispatcher-client'; import { CreateStep, mapRateLimit, StepRunFunction } from '@hatchet/step'; import { applyNamespace } from '@hatchet/util/apply-namespace'; +import sleep from '@hatchet/util/sleep'; +import { throwIfAborted } from '@hatchet/util/abort-error'; import { Context, DurableContext } from './context'; import { parentRunContextManager } from '../../parent-run-context-vars'; import { HealthServer, workerStatus, type WorkerStatus } from './health-server'; @@ -182,21 +184,6 @@ export class V1Worker { }; } - getHandler(workflows: Workflow[]) { - throw new Error('Not implemented'); - // TODO v1 - // for (const workflow of workflows) { - // const wf: Workflow = { - // ...workflow, - // id: this.client.config.namespace + workflow.id, - // }; - - // this.registerActions(wf); - // } - - // return new WebhookHandler(this, workflows); - } - async registerWebhook(webhook: WebhookWorkerCreateRequest) { return this.client._v0.admin.registerWebhook({ ...webhook }); } @@ -534,7 +521,7 @@ export class V1Worker { } async handleStartStepRun(action: Action) { - const { actionId, taskRunExternalId } = action; + const { actionId, taskRunExternalId, taskName } = action; try { // Note: we always use a DurableContext since its a superset of the Context class @@ -550,13 +537,20 @@ export class V1Worker { } const run = async () => { - parentRunContextManager.setContext({ - parentId: action.workflowRunId, - parentTaskRunExternalId: taskRunExternalId, - childIndex: 0, - desiredWorkerId: this.workerId || '', - }); - return step(context); + return parentRunContextManager.runWithContext( + { + parentId: action.workflowRunId, + parentTaskRunExternalId: taskRunExternalId, + childIndex: 0, + desiredWorkerId: this.workerId || '', + signal: context.abortController.signal, + }, + () => { + // Precheck: if cancellation already happened, don't execute user code. + throwIfAborted(context.abortController.signal); + return step(context); + } + ); }; const success = async (result: any) => { @@ -565,7 +559,7 @@ export class V1Worker { return; } - this.logger.info(`Task run ${taskRunExternalId} succeeded`); + this.logger.info(taskRunLog(taskName, taskRunExternalId, 'completed')); // Send the action event to the dispatcher const event = this.getStepActionEvent( @@ -616,7 +610,7 @@ export class V1Worker { return; } - this.logger.error(`Task run ${taskRunExternalId} failed: ${error.message}`); + this.logger.error(taskRunLog(taskName, taskRunExternalId, `failed: ${error.message}`)); if (error.stack) { this.logger.error(error.stack); @@ -652,6 +646,19 @@ export class V1Worker { await failure(e); return; } + + // Postcheck: user code may swallow AbortError; don't report completion after cancellation. + // If we reached this point and the signal is aborted, the task likely caught/ignored cancellation. + if (context.abortController.signal.aborted) { + this.logger.warn( + `Cancellation: task run ${taskRunExternalId} returned after cancellation was signaled. ` + + `This usually means an AbortError was caught and not propagated. ` + + `See https://docs.hatchet.run/home/cancellation` + ); + return; + } + throwIfAborted(context.abortController.signal); + await success(result); })() ); @@ -673,9 +680,8 @@ export class V1Worker { await future.promise; } catch (e: any) { const message = e?.message || String(e); - if (message.includes('Cancelled')) { - this.logger.debug(`Task run ${taskRunExternalId} was cancelled`); - } else { + // TODO is this cased correctly... + if (!message.includes('Cancelled')) { this.logger.error( `Could not wait for task run ${taskRunExternalId} to finish. ` + `See https://docs.hatchet.run/home/cancellation for best practices on handling cancellation: `, @@ -689,7 +695,7 @@ export class V1Worker { } async handleStartGroupKeyRun(action: Action) { - const { actionId, getGroupKeyRunId, taskRunExternalId } = action; + const { actionId, getGroupKeyRunId, taskRunExternalId, taskName } = action; this.logger.error( 'Concurrency Key Functions have been deprecated and will be removed in a future release. Use Concurrency Expressions instead.' @@ -721,7 +727,7 @@ export class V1Worker { }; const success = (result: any) => { - this.logger.info(`Task run ${taskRunExternalId} succeeded`); + this.logger.info(taskRunLog(taskName, taskRunExternalId, 'completed')); try { // Send the action event to the dispatcher @@ -743,7 +749,7 @@ export class V1Worker { }; const failure = (error: any) => { - this.logger.error(`Task run ${key} failed: ${error.message}`); + this.logger.error(taskRunLog(taskName, taskRunExternalId, `failed: ${error.message}`)); try { // Send the action event to the dispatcher @@ -824,26 +830,73 @@ export class V1Worker { } async handleCancelStepRun(action: Action) { - const { taskRunExternalId } = action; + const { taskRunExternalId, taskName } = action; + try { - this.logger.info(`Cancelling task run ${taskRunExternalId}`); const future = this.futures[taskRunExternalId]; const context = this.contexts[taskRunExternalId]; if (context && context.abortController) { - context.abortController.abort('Cancelled by worker'); + context.abortController.abort('Cancelled by worker'); // TODO this reason is nonsensical } if (future) { - future.promise.catch(() => { - this.logger.info(`Cancelled task run ${taskRunExternalId}`); - }); - future.cancel('Cancelled by worker'); - await future.promise; + const start = Date.now(); + const warningThresholdMs = this.client.config.cancellation_warning_threshold ?? 300; + const gracePeriodMs = this.client.config.cancellation_grace_period ?? 1000; + const warningMs = Math.max(0, warningThresholdMs); + const graceMs = Math.max(0, gracePeriodMs); + + // Ensure cancelling this future doesn't create an unhandled rejection in cases + // where the main action handler isn't currently awaiting `future.promise`. + future.promise.catch(() => undefined); + + // Cancel the future (rejects the wrapper); user code must still cooperate with AbortSignal. + future.cancel('Cancelled by worker'); // TODO this reason is nonsensical + + // Track completion of the underlying work (not the cancelable wrapper). + // Ensure this promise never throws into our supervision flow. + const completion = (future.inner ?? future.promise).catch(() => undefined); + + // Wait until warning threshold, then log if still running. + if (warningMs > 0) { + const winner = await Promise.race([ + completion.then(() => 'done' as const), + sleep(warningMs).then(() => 'warn' as const), + ]); + + if (winner === 'warn') { + const milliseconds = Date.now() - start; + this.logger.warn( + `Cancellation: task run ${taskRunExternalId} has not cancelled after ${milliseconds}ms. Consider checking for blocking operations. ` + + `See https://docs.hatchet.run/home/cancellation` + ); + } + } + + // Wait until grace period (total), then log if still running. + const elapsedMs = Date.now() - start; + const remainingMs = graceMs - elapsedMs; + const winner = await Promise.race([ + completion.then(() => 'done' as const), + sleep(Math.max(0, remainingMs)).then(() => 'grace' as const), + ]); + + if (winner === 'done') { + this.logger.info(taskRunLog(taskName, taskRunExternalId, 'cancelled')); + } else { + const totalElapsedMs = Date.now() - start; + this.logger.error( + `Cancellation: task run ${taskRunExternalId} still running after cancellation grace period ` + + `${totalElapsedMs}ms.\n` + + `JavaScript cannot force-kill user code; see: https://docs.hatchet.run/home/cancellation` + ); + } } } catch (e: any) { - // Expected: the promise rejects when cancelled - this.logger.debug(`Task run ${taskRunExternalId} cancellation completed`); + this.logger.error( + `Cancellation: error while supervising cancellation for task run ${taskRunExternalId}: ${e?.message || e}` + ); } finally { delete this.futures[taskRunExternalId]; delete this.contexts[taskRunExternalId]; @@ -924,9 +977,9 @@ export class V1Worker { this.logger.info(`Worker ${this.name} listening for actions`); for await (const action of generator) { - this.logger.info( - `Worker ${this.name} received action ${action.actionId}:${action.actionType}` - ); + const receivedType = actionMap(action.actionType); + + this.logger.info(taskRunLog(action.taskName, action.taskRunExternalId, `${receivedType}`)); void this.handleAction(action); } diff --git a/sdks/typescript/src/v1/declaration.ts b/sdks/typescript/src/v1/declaration.ts index 090b86dc67..be3e15bd99 100644 --- a/sdks/typescript/src/v1/declaration.ts +++ b/sdks/typescript/src/v1/declaration.ts @@ -9,6 +9,7 @@ import { } from '@hatchet/clients/rest/generated/data-contracts'; import { Workflow as WorkflowV0 } from '@hatchet/workflow'; import { z } from 'zod'; +import { throwIfAborted } from '@hatchet/util/abort-error'; import { IHatchetClient } from './client/client.interface'; import { CreateWorkflowTaskOpts, @@ -312,7 +313,6 @@ export class BaseWorkflowDeclaration< // set the parent run context const parentRunContext = parentRunContextManager.getContext(); - parentRunContextManager.incrementChildIndex(Array.isArray(input) ? input.length : 1); if (!parentRunContext && (options?.childKey || options?.sticky)) { this.client.admin.logger.warn( @@ -320,8 +320,22 @@ export class BaseWorkflowDeclaration< ); } + const inheritedSignal = parentRunContext?.signal; + + // Precheck: if we're being called from a cancelled parent task, do not enqueue more work. + // The signal is inherited from the parent task's `ctx.abortController.signal`. + throwIfAborted(inheritedSignal, { + isTrigger: true, + context: parentRunContext?.parentTaskRunExternalId + ? `task run ${parentRunContext.parentTaskRunExternalId}` + : undefined, + warn: (message) => this.client!.admin.logger.warn(message), + }); + + parentRunContextManager.incrementChildIndex(Array.isArray(input) ? input.length : 1); + const runOpts = { - ...options, + ...(options ?? {}), parentId: parentRunContext?.parentId, parentTaskRunExternalId: parentRunContext?.parentTaskRunExternalId, childIndex: parentRunContext?.childIndex, @@ -357,6 +371,9 @@ export class BaseWorkflowDeclaration< // eslint-disable-next-line no-param-reassign ref._standaloneTaskName = _standaloneTaskName; } + // Ensure result subscriptions inherit cancellation if no signal is provided explicitly. + // eslint-disable-next-line no-param-reassign + ref.defaultSignal = inheritedSignal; res.push(ref); }); return res; @@ -368,6 +385,7 @@ export class BaseWorkflowDeclaration< res._standaloneTaskName = _standaloneTaskName; } + res.defaultSignal = inheritedSignal; return res; } @@ -432,10 +450,16 @@ export class BaseWorkflowDeclaration< throw UNBOUND_ERR; } + // If called from within a cancelled parent task, do not enqueue scheduled work. + throwIfAborted(parentRunContextManager.getContext()?.signal, { + isTrigger: true, + warn: (message) => this.client!.admin.logger.warn(message), + }); + const scheduled = this.client.scheduled.create(this.definition.name, { triggerAt: enqueueAt, input: input as JsonObject, - ...options, + ...(options ?? {}), }); return scheduled; @@ -474,10 +498,16 @@ export class BaseWorkflowDeclaration< throw UNBOUND_ERR; } + // If called from within a cancelled parent task, do not enqueue cron work. + throwIfAborted(parentRunContextManager.getContext()?.signal, { + isTrigger: true, + warn: (message) => this.client!.admin.logger.warn(message), + }); + const cronDef = this.client.crons.create(this.definition.name, { expression, input: input as JsonObject, - ...options, + ...(options ?? {}), additionalMetadata: options?.additionalMetadata, name, }); diff --git a/sdks/typescript/src/v1/examples/on_event/event.e2e.ts b/sdks/typescript/src/v1/examples/on_event/event.e2e.ts index 612082037c..3d081e0a14 100644 --- a/sdks/typescript/src/v1/examples/on_event/event.e2e.ts +++ b/sdks/typescript/src/v1/examples/on_event/event.e2e.ts @@ -56,6 +56,7 @@ xdescribe('events-e2e', () => { const maxAttempts = 15; const eventToRuns: Record = {}; + // eslint-disable-next-line no-constant-condition while (true) { console.log('Waiting for event runs to complete...'); if (attempts > maxAttempts) { diff --git a/sdks/typescript/src/v1/parent-run-context-vars.ts b/sdks/typescript/src/v1/parent-run-context-vars.ts index e02d45ccdb..8518d23f8a 100644 --- a/sdks/typescript/src/v1/parent-run-context-vars.ts +++ b/sdks/typescript/src/v1/parent-run-context-vars.ts @@ -8,6 +8,12 @@ export interface ParentRunContext { parentTaskRunExternalId: string; desiredWorkerId: string; childIndex?: number; + + /** + * (optional) AbortSignal inherited by nested `run()` calls. + * Used to cancel local "wait for result" subscriptions when the parent task is cancelled. + */ + signal?: AbortSignal; } export class ParentRunContextManager { @@ -17,6 +23,15 @@ export class ParentRunContextManager { this.storage = new AsyncLocalStorage (); } + runWithContext (opts: ParentRunContext, fn: () => T): T { + return this.storage.run( + { + ...opts, + }, + fn + ); + } + setContext(opts: ParentRunContext): void { this.storage.enterWith({ ...opts,