diff --git a/src/engine.ts b/src/engine.ts index 1aabd84..35b2950 100644 --- a/src/engine.ts +++ b/src/engine.ts @@ -3,6 +3,7 @@ import { exec } from "@actions/exec"; import { dump, load } from "js-yaml"; import type { ComposeSpec } from "./compose.js"; import type { Settings } from "./settings.js"; +import type { TaskInfo } from "./types.js"; import { mapToObject } from "./utils"; /** @@ -274,6 +275,30 @@ export async function getServiceLogs( } } +/** + * List tasks for a service + * + * This function retrieves the list of tasks for a service, which includes + * information about task states, errors, and failure reasons. + * + * @param serviceId The ID or name of the service + * @returns Array of task information objects + */ +export async function listServiceTasks(serviceId: string) { + try { + const output = await executeDockerCommand( + ["service", "ps", "--format=json", "--no-trunc", serviceId], + { silent: true }, + ); + + return parseLineDelimitedJson(output); + } catch (cause) { + throw new Error(`Failed to list tasks for service "${serviceId}": ${cause}`, { + cause, + }); + } +} + export async function listSecrets(filters: { id?: ValueFilter; name?: ValueFilter; diff --git a/src/monitoring.ts b/src/monitoring.ts index 010ce74..b633648 100644 --- a/src/monitoring.ts +++ b/src/monitoring.ts @@ -2,10 +2,12 @@ import * as core from "@actions/core"; import { getServiceLogs, listServices, + listServiceTasks, type Service, type ServiceWithMetadata, } from "./engine.js"; import type { Settings } from "./settings.js"; +import type { TaskInfo } from "./types.js"; import { sleep } from "./utils.js"; /** @@ -63,20 +65,48 @@ export async function monitorDeployment(settings: Readonly) { try { complete = isServiceUpdateComplete(service); } catch (error) { - const logs = await getServiceLogs(service.ID, { since: startTime }); const message = error instanceof Error ? error.message : String(error); - core.error( - new Error( - `Service "${serviceIdentifier}" failed to update: ${message}`, - { cause: error }, - ), - ); - core.error(`Service Details:\n${JSON.stringify(service, null, 2)}`); + // Fetch task details to get actionable error information + let taskFailureDetails: string | undefined; + try { + const tasks = await listServiceTasks(service.ID); + taskFailureDetails = await getTaskFailureDetails(tasks); + } catch (taskError) { + core.debug( + `Failed to fetch task details: ${taskError instanceof Error ? taskError.message : String(taskError)}`, + ); + } + + // Build comprehensive error message with task details + const errorMessage = taskFailureDetails + ? `Service "${serviceIdentifier}" failed to update: ${message}. ${taskFailureDetails}` + : `Service "${serviceIdentifier}" failed to update: ${message}`; + + // Single error annotation with actionable information + core.error(new Error(errorMessage, { cause: error })); + + // Fetch logs for summary + const logs = await getServiceLogs(service.ID, { since: startTime }); core.setOutput("service-logs", logs.toString()); - core.summary.addHeading("Service Logs", 2); + + // Add detailed information to job summary (not as error annotation) + core.summary.addHeading("Service Update Failure Details", 2); core.summary.addRaw( - `Before the "${serviceIdentifier}" service update failed, the following logs were generated:`, + `Service "${serviceIdentifier}" failed to update.`, + true, + ); + + // Add task failure details to summary if available + if (taskFailureDetails) { + core.summary.addHeading("Task Failure Reason", 3); + core.summary.addRaw(taskFailureDetails, true); + } + + // Add service logs to summary + core.summary.addHeading("Service Logs", 3); + core.summary.addRaw( + `Logs generated before the service update failed:`, true, ); core.summary.addTable([ @@ -99,6 +129,10 @@ export async function monitorDeployment(settings: Readonly) { ]), ]); + // Add service details to summary for debugging + core.summary.addHeading("Service Details (Debug)", 3); + core.summary.addCodeBlock(JSON.stringify(service, null, 2), "json"); + throw error; } @@ -240,12 +274,66 @@ function resolveFailureReason( ) { const reason = { - paused: "Service is paused", + paused: + "Service update paused due to task failure (check task logs for details)", rollback_started: "Service failed to update and is being rolled back", rollback_completed: "Service failed to update and was rolled back", - rollback_paused: "Service is paused and is being rolled back", + rollback_paused: + "Service rollback paused due to task failure (check task logs for details)", unknown: `Service update status '${state}' is unknown`, }[state] ?? "Unknown failure reason"; return reason + (message ? `: ${message}` : ""); } + +/** + * Extract actionable error information from failed tasks + * + * This function analyzes failed tasks to provide meaningful error messages + * that help diagnose why a service update failed. It examines task states, + * error messages, and failure patterns to give actionable feedback. + * + * @param tasks Array of tasks from docker service ps + * @returns A string describing the task failure reason, or undefined if no clear failure + */ +async function getTaskFailureDetails( + tasks: TaskInfo[], +): Promise { + // Filter to only failed or rejected tasks + const failedTasks = tasks.filter( + (task) => + task.Status.State === "failed" || + task.Status.State === "rejected" || + task.DesiredState === "shutdown", + ); + + if (failedTasks.length === 0) { + return undefined; + } + + // Get the most recent failed task + const recentFailedTask = failedTasks.sort( + (a, b) => + new Date(b.UpdatedAt).getTime() - new Date(a.UpdatedAt).getTime(), + )[0]; + + // Extract error information + const errorParts: string[] = []; + + if (recentFailedTask.Status.Err) { + errorParts.push(recentFailedTask.Status.Err); + } else if (recentFailedTask.Status.Message) { + errorParts.push(recentFailedTask.Status.Message); + } + + // Add context about the task state + if (recentFailedTask.Status.State === "rejected") { + errorParts.push("task was rejected by the scheduler"); + } else if (recentFailedTask.Status.State === "failed") { + errorParts.push("task failed to start or run successfully"); + } + + return errorParts.length > 0 + ? `Task ${recentFailedTask.ID.substring(0, 12)} ${errorParts.join(": ")}` + : undefined; +} diff --git a/tests/engine.test.ts b/tests/engine.test.ts index 07edcd6..792d74b 100644 --- a/tests/engine.test.ts +++ b/tests/engine.test.ts @@ -455,6 +455,67 @@ services: }); }); + describe("listServiceTasks", () => { + it("should list tasks for a service", async () => { + const mockTasks = [ + { + ID: "task1abc", + ServiceID: "service123", + NodeID: "node1", + DesiredState: "running", + Status: { State: "running" }, + CreatedAt: "2024-01-01T00:00:00Z", + UpdatedAt: "2024-01-01T00:00:01Z", + }, + { + ID: "task2def", + ServiceID: "service123", + NodeID: "node1", + DesiredState: "shutdown", + Status: { State: "failed", Err: "container exited with code 1" }, + CreatedAt: "2024-01-01T00:00:00Z", + UpdatedAt: "2024-01-01T00:00:02Z", + }, + ]; + const mockOutput = mockTasks.map((t) => JSON.stringify(t)).join("\n"); + + mockedExec.mockImplementation(async (_0, _1, options) => { + options?.listeners?.stdout?.(Buffer.from(mockOutput)); + return 0; + }); + + const tasks = await engine.listServiceTasks("service123"); + + expect(mockedExec).toHaveBeenCalledWith( + "docker", + ["service", "ps", "--format=json", "--no-trunc", "service123"], + expect.objectContaining({ silent: true }), + ); + expect(tasks).toHaveLength(2); + expect(tasks[0].ID).toBe("task1abc"); + expect(tasks[1].Status.Err).toBe("container exited with code 1"); + }); + + it("should handle empty task list", async () => { + mockedExec.mockImplementation(async (_0, _1, options) => { + options?.listeners?.stdout?.(Buffer.from("")); + return 0; + }); + + const tasks = await engine.listServiceTasks("service123"); + + expect(tasks).toHaveLength(0); + }); + + it("should throw error on exec failure", async () => { + mockedExec.mockRejectedValue(new Error("Docker error")); + + await expect(engine.listServiceTasks("service123")).rejects.toThrowError( + /Failed to list tasks for service/, + ); + }); + }); + describe("listSecrets", () => { it("should list secrets and parse labels", async () => { const mockSecret = { diff --git a/tests/monitoring.test.ts b/tests/monitoring.test.ts index 5ab7df9..1177d3c 100644 --- a/tests/monitoring.test.ts +++ b/tests/monitoring.test.ts @@ -438,6 +438,81 @@ describe("Monitoring", () => { ); }); + it("should include task failure details in error message", async () => { + vi.useFakeTimers(); + + const mockTasks = [ + { + ID: "task1abcdefghijklmnop", + ServiceID: "web_service", + NodeID: "node1", + DesiredState: "shutdown", + Labels: {}, + Status: { + State: "failed", + Err: "task: non-zero exit (1)", + Message: "started", + }, + Spec: {}, + CreatedAt: "2024-01-01T00:00:00Z", + UpdatedAt: "2024-01-01T00:00:02Z", + }, + ]; + + const serviceHistory = [ + [ + { + ID: "web_service", + Spec: { Name: "test" }, + UpdateStatus: { State: "paused", Message: "update paused" }, + } as ServiceWithMetadata, + ], + ]; + + vi.spyOn(engine, "listServices") + .mockResolvedValueOnce(serviceHistory[0]); + vi.spyOn(engine, "listServiceTasks").mockResolvedValueOnce(mockTasks); + vi.spyOn(engine, "getServiceLogs").mockResolvedValueOnce([]); + + // Should include task details in error + // noinspection JSVoidFunctionReturnValueUsed + const promise = expect(monitorDeployment(settings)).rejects.toThrow(); + await vi.runAllTimersAsync(); + await promise; + + // Verify listServiceTasks was called to fetch task details + expect(engine.listServiceTasks).toHaveBeenCalledWith("web_service"); + }); + + it("should handle task fetch errors gracefully", async () => { + vi.useFakeTimers(); + + const serviceHistory = [ + [ + { + ID: "web_service", + Spec: { Name: "test" }, + UpdateStatus: { State: "paused" }, + } as ServiceWithMetadata, + ], + ]; + + vi.spyOn(engine, "listServices") + .mockResolvedValueOnce(serviceHistory[0]); + vi.spyOn(engine, "listServiceTasks").mockRejectedValueOnce( + new Error("Failed to fetch tasks"), + ); + vi.spyOn(engine, "getServiceLogs").mockResolvedValueOnce([]); + + // Should still throw error even if task fetch fails + // noinspection JSVoidFunctionReturnValueUsed + const promise = expect(monitorDeployment(settings)).rejects.toThrowError(); + await vi.runAllTimersAsync(); + await promise; + + expect(engine.listServiceTasks).toHaveBeenCalled(); + }); + it("should not monitor deployment if `monitor` is false", async () => { vi.spyOn(engine, "listServices");