Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 16 additions & 19 deletions server/src/__tests__/heartbeat-process-recovery.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1486,37 +1486,34 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
expect(wakeups).toHaveLength(1);
});

it("re-enqueues continuation when the latest automatic continuation succeeded without closing the issue", async () => {
const { agentId, issueId, runId } = await seedStrandedIssueFixture({
it("escalates to blocked when the latest automatic continuation succeeded without closing the issue", async () => {
const { companyId, agentId, issueId, runId } = await seedStrandedIssueFixture({
status: "in_progress",
runStatus: "succeeded",
retryReason: "issue_continuation_needed",
});
const heartbeat = heartbeatService(db);

const result = await heartbeat.reconcileStrandedAssignedIssues();
expect(result.continuationRequeued).toBe(1);
expect(result.escalated).toBe(0);
expect(result.continuationRequeued).toBe(0);
expect(result.escalated).toBe(1);
expect(result.issueIds).toEqual([issueId]);

const issue = await db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => rows[0] ?? null);
expect(issue?.status).toBe("in_progress");

const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(0);
expect(issue?.status).toBe("blocked");

const runs = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.agentId, agentId));
expect(runs).toHaveLength(2);
await expectStrandedRecoveryArtifacts({
companyId,
agentId,
issueId,
runId,
previousStatus: "in_progress",
retryReason: "issue_continuation_needed",
});

const retryRun = runs.find((row) => row.id !== runId);
expect(retryRun?.id).toBeTruthy();
expect((retryRun?.contextSnapshot as Record<string, unknown>)?.retryReason).toBe("issue_continuation_needed");
if (retryRun) {
await waitForRunToSettle(heartbeat, retryRun.id);
}
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("retried continuation");
});

it("does not reconcile user-assigned work through the agent stranded-work recovery path", async () => {
Expand Down
11 changes: 7 additions & 4 deletions server/src/services/heartbeat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -994,18 +994,21 @@ function summarizeRunFailureForIssueComment(
return null;
}

function didAutomaticRecoveryFail(
function didAutomaticRecoveryExhaust(
latestRun: Pick<typeof heartbeatRuns.$inferSelect, "status" | "contextSnapshot"> | null,
expectedRetryReason: "assignment_recovery" | "issue_continuation_needed",
) {
if (!latestRun) return false;

const latestContext = parseObject(latestRun.contextSnapshot);
const latestRetryReason = readNonEmptyString(latestContext.retryReason);
// A succeeded recovery run is also considered exhausted: call sites verify there is no
// active execution path before reaching this check, so a run that exited successfully
// without re-establishing one left the issue stranded and should trigger escalation.
return (
latestRetryReason === expectedRetryReason &&
UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
latestRun.status as (typeof UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
latestRun.status as (typeof HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
)
);
}
Expand Down Expand Up @@ -6031,7 +6034,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {})
const shouldBlockImmediately =
!recoveryAgentInvokable ||
!recoveryAgent ||
didAutomaticRecoveryFail(run, issue.status === "todo" ? "assignment_recovery" : "issue_continuation_needed");
didAutomaticRecoveryExhaust(run, issue.status === "todo" ? "assignment_recovery" : "issue_continuation_needed");
if (shouldBlockImmediately) {
const comment = buildImmediateExecutionPathRecoveryComment({
status: issue.status as "todo" | "in_progress",
Expand Down
14 changes: 9 additions & 5 deletions server/src/services/recovery/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import { isAutomaticRecoverySuppressedByPauseHold } from "./pause-hold-guard.js"

const EXECUTION_PATH_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const;
const UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES = ["failed", "cancelled", "timed_out"] as const;
Comment thread
greptile-apps[bot] marked this conversation as resolved.
Outdated
const HEARTBEAT_RUN_TERMINAL_STATUSES = ["succeeded", "failed", "cancelled", "timed_out"] as const;
const ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_MIN_STALE_MS = 24 * 60 * 60 * 1000;
export const ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS = 60 * 60 * 1000;
export const ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS = 4 * 60 * 60 * 1000;
Expand Down Expand Up @@ -111,17 +112,20 @@ function summarizeRunFailureForIssueComment(run: LatestIssueRun) {
return null;
}

function didAutomaticRecoveryFail(
function didAutomaticRecoveryExhaust(
latestRun: LatestIssueRun,
expectedRetryReason: "assignment_recovery" | "issue_continuation_needed",
) {
if (!latestRun) return false;

const latestContext = parseObject(latestRun.contextSnapshot);
const latestRetryReason = readNonEmptyString(latestContext.retryReason);
// A succeeded recovery run is also considered exhausted: call sites verify there is no
// active execution path before reaching this check, so a run that exited successfully
// without re-establishing one left the issue stranded and should trigger escalation.
return latestRetryReason === expectedRetryReason &&
UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
latestRun.status as (typeof UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
latestRun.status as (typeof HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
);
}

Expand Down Expand Up @@ -1454,7 +1458,7 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
continue;
}

if (didAutomaticRecoveryFail(latestRun, "assignment_recovery")) {
if (didAutomaticRecoveryExhaust(latestRun, "assignment_recovery")) {
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
const updated = await escalateStrandedAssignedIssue({
issue,
Expand Down Expand Up @@ -1495,7 +1499,7 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
result.skipped += 1;
continue;
}
if (didAutomaticRecoveryFail(latestRun, "issue_continuation_needed")) {
if (didAutomaticRecoveryExhaust(latestRun, "issue_continuation_needed")) {
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
const updated = await escalateStrandedAssignedIssue({
issue,
Expand Down
Loading