Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { exec } from "@actions/exec";
import { dump, load } from "js-yaml";
import type { ComposeSpec } from "./compose.js";
import type { Settings } from "./settings.js";
import type { TaskInfo } from "./types.js";
import { mapToObject } from "./utils";

/**
Expand Down Expand Up @@ -274,6 +275,30 @@ export async function getServiceLogs(
}
}

/**
* List tasks for a service
*
* This function retrieves the list of tasks for a service, which includes
* information about task states, errors, and failure reasons.
*
* @param serviceId The ID or name of the service
* @returns Array of task information objects
*/
export async function listServiceTasks(serviceId: string) {
try {
const output = await executeDockerCommand(
["service", "ps", "--format=json", "--no-trunc", serviceId],
{ silent: true },
);

return parseLineDelimitedJson<TaskInfo>(output);
} catch (cause) {
throw new Error(`Failed to list tasks for service "${serviceId}": ${cause}`, {
cause,
});
}
}

export async function listSecrets(filters: {
id?: ValueFilter;
name?: ValueFilter;
Expand Down
112 changes: 100 additions & 12 deletions src/monitoring.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ import * as core from "@actions/core";
import {
getServiceLogs,
listServices,
listServiceTasks,
type Service,
type ServiceWithMetadata,
} from "./engine.js";
import type { Settings } from "./settings.js";
import type { TaskInfo } from "./types.js";
import { sleep } from "./utils.js";

/**
Expand Down Expand Up @@ -63,20 +65,48 @@ export async function monitorDeployment(settings: Readonly<Settings>) {
try {
complete = isServiceUpdateComplete(service);
} catch (error) {
const logs = await getServiceLogs(service.ID, { since: startTime });
const message = error instanceof Error ? error.message : String(error);

core.error(
new Error(
`Service "${serviceIdentifier}" failed to update: ${message}`,
{ cause: error },
),
);
core.error(`Service Details:\n${JSON.stringify(service, null, 2)}`);
// Fetch task details to get actionable error information
let taskFailureDetails: string | undefined;
try {
const tasks = await listServiceTasks(service.ID);
taskFailureDetails = await getTaskFailureDetails(tasks);
} catch (taskError) {
core.debug(
`Failed to fetch task details: ${taskError instanceof Error ? taskError.message : String(taskError)}`,
);
}

// Build comprehensive error message with task details
const errorMessage = taskFailureDetails
? `Service "${serviceIdentifier}" failed to update: ${message}. ${taskFailureDetails}`
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message construction could result in double periods if the original error message already ends with a period. While this is a minor formatting issue, consider normalizing the message format.

For example:

const errorMessage = taskFailureDetails
  ? `Service "${serviceIdentifier}" failed to update: ${message}${message.endsWith('.') ? '' : '.'} ${taskFailureDetails}`
  : `Service "${serviceIdentifier}" failed to update: ${message}`;

Alternatively, you could simply use a space without a period between the messages:

const errorMessage = taskFailureDetails
  ? `Service "${serviceIdentifier}" failed to update: ${message} ${taskFailureDetails}`
  : `Service "${serviceIdentifier}" failed to update: ${message}`;
Suggested change
? `Service "${serviceIdentifier}" failed to update: ${message}. ${taskFailureDetails}`
? `Service "${serviceIdentifier}" failed to update: ${message}${message.endsWith(".") ? "" : "."} ${taskFailureDetails}`

Copilot uses AI. Check for mistakes.
: `Service "${serviceIdentifier}" failed to update: ${message}`;

// Single error annotation with actionable information
core.error(new Error(errorMessage, { cause: error }));

// Fetch logs for summary
const logs = await getServiceLogs(service.ID, { since: startTime });
core.setOutput("service-logs", logs.toString());
core.summary.addHeading("Service Logs", 2);

// Add detailed information to job summary (not as error annotation)
core.summary.addHeading("Service Update Failure Details", 2);
core.summary.addRaw(
`Before the "${serviceIdentifier}" service update failed, the following logs were generated:`,
`Service "${serviceIdentifier}" failed to update.`,
true,
);

// Add task failure details to summary if available
if (taskFailureDetails) {
core.summary.addHeading("Task Failure Reason", 3);
core.summary.addRaw(taskFailureDetails, true);
}

// Add service logs to summary
core.summary.addHeading("Service Logs", 3);
core.summary.addRaw(
`Logs generated before the service update failed:`,
true,
);
core.summary.addTable([
Expand All @@ -99,6 +129,10 @@ export async function monitorDeployment(settings: Readonly<Settings>) {
]),
]);

// Add service details to summary for debugging
core.summary.addHeading("Service Details (Debug)", 3);
core.summary.addCodeBlock(JSON.stringify(service, null, 2), "json");

throw error;
}

Expand Down Expand Up @@ -240,12 +274,66 @@ function resolveFailureReason(
) {
const reason =
{
paused: "Service is paused",
paused:
"Service update paused due to task failure (check task logs for details)",
rollback_started: "Service failed to update and is being rolled back",
rollback_completed: "Service failed to update and was rolled back",
rollback_paused: "Service is paused and is being rolled back",
rollback_paused:
"Service rollback paused due to task failure (check task logs for details)",
unknown: `Service update status '${state}' is unknown`,
}[state] ?? "Unknown failure reason";

return reason + (message ? `: ${message}` : "");
}

/**
* Extract actionable error information from failed tasks
*
* This function analyzes failed tasks to provide meaningful error messages
* that help diagnose why a service update failed. It examines task states,
* error messages, and failure patterns to give actionable feedback.
*
* @param tasks Array of tasks from docker service ps
* @returns A string describing the task failure reason, or undefined if no clear failure
*/
async function getTaskFailureDetails(
tasks: TaskInfo[],
): Promise<string | undefined> {
Comment on lines +299 to +301
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function is declared as async but doesn't use await anywhere and doesn't return a Promise directly. The async keyword is unnecessary here and can be removed.

Change the function signature from:

async function getTaskFailureDetails(
  tasks: TaskInfo[],
): Promise<string | undefined>

to:

function getTaskFailureDetails(
  tasks: TaskInfo[],
): string | undefined

And update the call site at line 74 to remove the await keyword.

Suggested change
async function getTaskFailureDetails(
tasks: TaskInfo[],
): Promise<string | undefined> {
function getTaskFailureDetails(
tasks: TaskInfo[],
): string | undefined {

Copilot uses AI. Check for mistakes.
// Filter to only failed or rejected tasks
const failedTasks = tasks.filter(
(task) =>
task.Status.State === "failed" ||
task.Status.State === "rejected" ||
task.DesiredState === "shutdown",
Comment on lines +302 to +307
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The filter logic may incorrectly identify non-failed tasks as failures. The condition task.DesiredState === "shutdown" will match any task being shut down, including successfully completed old tasks during a normal rolling update. This could result in misleading error messages that reference successfully completed tasks rather than the actual failed tasks.

Consider refining the filter to only include tasks where Status.State indicates an actual failure (failed, rejected) or where DesiredState is shutdown AND Status.Err is present. For example:

const failedTasks = tasks.filter(
  (task) =>
    task.Status.State === "failed" ||
    task.Status.State === "rejected" ||
    (task.DesiredState === "shutdown" && task.Status.Err)
);

This ensures that shutdown tasks are only considered failures if they have an associated error message.

Suggested change
// Filter to only failed or rejected tasks
const failedTasks = tasks.filter(
(task) =>
task.Status.State === "failed" ||
task.Status.State === "rejected" ||
task.DesiredState === "shutdown",
// Filter to only failed or rejected tasks, or shutdown tasks with an error
const failedTasks = tasks.filter(
(task) =>
task.Status.State === "failed" ||
task.Status.State === "rejected" ||
(task.DesiredState === "shutdown" && task.Status.Err),

Copilot uses AI. Check for mistakes.
);

if (failedTasks.length === 0) {
return undefined;
}

// Get the most recent failed task
const recentFailedTask = failedTasks.sort(
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sort operation mutates the failedTasks array in place. While this works correctly, it's better practice to avoid mutating the filtered array. Consider using toSorted() instead of sort() to create a new sorted array without mutating the original, or make a copy before sorting.

For example:

const recentFailedTask = [...failedTasks].sort(
  (a, b) =>
    new Date(b.UpdatedAt).getTime() - new Date(a.UpdatedAt).getTime(),
)[0];

or:

const recentFailedTask = failedTasks.toSorted(
  (a, b) =>
    new Date(b.UpdatedAt).getTime() - new Date(a.UpdatedAt).getTime(),
)[0];

Note: toSorted() requires Node.js 20+ and TypeScript 5.2+.

Suggested change
const recentFailedTask = failedTasks.sort(
const recentFailedTask = failedTasks.toSorted(

Copilot uses AI. Check for mistakes.
(a, b) =>
new Date(b.UpdatedAt).getTime() - new Date(a.UpdatedAt).getTime(),
)[0];

// Extract error information
const errorParts: string[] = [];

if (recentFailedTask.Status.Err) {
errorParts.push(recentFailedTask.Status.Err);
} else if (recentFailedTask.Status.Message) {
errorParts.push(recentFailedTask.Status.Message);
}

// Add context about the task state
if (recentFailedTask.Status.State === "rejected") {
errorParts.push("task was rejected by the scheduler");
} else if (recentFailedTask.Status.State === "failed") {
errorParts.push("task failed to start or run successfully");
}

return errorParts.length > 0
? `Task ${recentFailedTask.ID.substring(0, 12)} ${errorParts.join(": ")}`
: undefined;
}
Loading