Skip to content

Commit 1f39d1a

Browse files
authored
Add channel health diagnostics and gateway recovery fixes (#855)
1 parent 6acd8ac commit 1f39d1a

22 files changed

+1868
-52
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,6 @@ artifacts/
6666
docs/pr-session-notes-*.md
6767

6868
.cursor/
69+
.claude/
6970
.pnpm-store/
7071
package-lock.json

electron/api/routes/channels.ts

Lines changed: 84 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ import {
3333
import {
3434
computeChannelRuntimeStatus,
3535
pickChannelRuntimeStatus,
36+
type ChannelConnectionStatus,
3637
type ChannelRuntimeAccountSnapshot,
38+
type GatewayHealthState,
3739
} from '../../utils/channel-status';
3840
import {
3941
OPENCLAW_WECHAT_CHANNEL_TYPE,
@@ -65,6 +67,8 @@ import {
6567
normalizeWhatsAppMessagingTarget,
6668
} from '../../utils/openclaw-sdk';
6769
import { logger } from '../../utils/logger';
70+
import { buildGatewayHealthSummary } from '../../utils/gateway-health';
71+
import type { GatewayHealthSummary } from '../../gateway/manager';
6872

6973
// listWhatsAppDirectory*FromConfig were removed from openclaw's public exports
7074
// in 2026.3.23-1. No-op stubs; WhatsApp target picker uses session discovery.
@@ -405,18 +409,43 @@ interface ChannelAccountView {
405409
running: boolean;
406410
linked: boolean;
407411
lastError?: string;
408-
status: 'connected' | 'connecting' | 'disconnected' | 'error';
412+
status: ChannelConnectionStatus;
413+
statusReason?: string;
409414
isDefault: boolean;
410415
agentId?: string;
411416
}
412417

413418
interface ChannelAccountsView {
414419
channelType: string;
415420
defaultAccountId: string;
416-
status: 'connected' | 'connecting' | 'disconnected' | 'error';
421+
status: ChannelConnectionStatus;
422+
statusReason?: string;
417423
accounts: ChannelAccountView[];
418424
}
419425

426+
export function getChannelStatusDiagnostics(): {
427+
lastChannelsStatusOkAt?: number;
428+
lastChannelsStatusFailureAt?: number;
429+
} {
430+
return {
431+
lastChannelsStatusOkAt,
432+
lastChannelsStatusFailureAt,
433+
};
434+
}
435+
436+
function gatewayHealthStateForChannels(
437+
gatewayHealthState: GatewayHealthState,
438+
): GatewayHealthState | undefined {
439+
return gatewayHealthState === 'healthy' ? undefined : gatewayHealthState;
440+
}
441+
442+
function overlayStatusReason(
443+
gatewayHealth: GatewayHealthSummary,
444+
fallbackReason: string,
445+
): string {
446+
return gatewayHealth.reasons[0] || fallbackReason;
447+
}
448+
420449
function buildGatewayStatusSnapshot(status: GatewayChannelStatusPayload | null): string {
421450
if (!status?.channelAccounts) return 'none';
422451
const entries = Object.entries(status.channelAccounts);
@@ -480,11 +509,13 @@ type DirectoryEntry = {
480509
const CHANNEL_TARGET_CACHE_TTL_MS = 60_000;
481510
const CHANNEL_TARGET_CACHE_ENABLED = process.env.VITEST !== 'true';
482511
const channelTargetCache = new Map<string, { expiresAt: number; targets: ChannelTargetOptionView[] }>();
512+
let lastChannelsStatusOkAt: number | undefined;
513+
let lastChannelsStatusFailureAt: number | undefined;
483514

484-
async function buildChannelAccountsView(
515+
export async function buildChannelAccountsView(
485516
ctx: HostApiContext,
486517
options?: { probe?: boolean },
487-
): Promise<ChannelAccountsView[]> {
518+
): Promise<{ channels: ChannelAccountsView[]; gatewayHealth: GatewayHealthSummary }> {
488519
const startedAt = Date.now();
489520
// Read config once and share across all sub-calls (was 5 readFile calls before).
490521
const openClawConfig = await readOpenClawConfig();
@@ -507,17 +538,32 @@ async function buildChannelAccountsView(
507538
{ probe },
508539
probe ? 5000 : 8000,
509540
);
541+
lastChannelsStatusOkAt = Date.now();
510542
logger.info(
511543
`[channels.accounts] channels.status probe=${probe ? '1' : '0'} elapsedMs=${Date.now() - rpcStartedAt} snapshot=${buildGatewayStatusSnapshot(gatewayStatus)}`
512544
);
513545
} catch {
514546
const probe = options?.probe === true;
547+
lastChannelsStatusFailureAt = Date.now();
515548
logger.warn(
516549
`[channels.accounts] channels.status probe=${probe ? '1' : '0'} failed after ${Date.now() - startedAt}ms`
517550
);
518551
gatewayStatus = null;
519552
}
520553

554+
const gatewayDiagnostics = ctx.gatewayManager.getDiagnostics?.() ?? {
555+
consecutiveHeartbeatMisses: 0,
556+
consecutiveRpcFailures: 0,
557+
};
558+
const gatewayHealth = buildGatewayHealthSummary({
559+
status: ctx.gatewayManager.getStatus(),
560+
diagnostics: gatewayDiagnostics,
561+
lastChannelsStatusOkAt,
562+
lastChannelsStatusFailureAt,
563+
platform: process.platform,
564+
});
565+
const gatewayHealthState = gatewayHealthStateForChannels(gatewayHealth.state);
566+
521567
const channelTypes = new Set<string>([
522568
...configuredChannels,
523569
...Object.keys(configuredAccounts),
@@ -566,7 +612,9 @@ async function buildChannelAccountsView(
566612
const accounts: ChannelAccountView[] = accountIds.map((accountId) => {
567613
const runtime = runtimeAccounts.find((item) => item.accountId === accountId);
568614
const runtimeSnapshot: ChannelRuntimeAccountSnapshot = runtime ?? {};
569-
const status = computeChannelRuntimeStatus(runtimeSnapshot);
615+
const status = computeChannelRuntimeStatus(runtimeSnapshot, {
616+
gatewayHealthState,
617+
});
570618
return {
571619
accountId,
572620
name: runtime?.name || accountId,
@@ -576,6 +624,11 @@ async function buildChannelAccountsView(
576624
linked: runtime?.linked === true,
577625
lastError: typeof runtime?.lastError === 'string' ? runtime.lastError : undefined,
578626
status,
627+
statusReason: status === 'degraded'
628+
? overlayStatusReason(gatewayHealth, 'gateway_degraded')
629+
: status === 'error'
630+
? 'runtime_error'
631+
: undefined,
579632
isDefault: accountId === defaultAccountId,
580633
agentId: agentsSnapshot.channelAccountOwners[`${rawChannelType}:${accountId}`],
581634
};
@@ -585,10 +638,32 @@ async function buildChannelAccountsView(
585638
return left.accountId.localeCompare(right.accountId);
586639
});
587640

641+
const visibleAccountSnapshots: ChannelRuntimeAccountSnapshot[] = accounts.map((account) => ({
642+
connected: account.connected,
643+
running: account.running,
644+
linked: account.linked,
645+
lastError: account.lastError,
646+
}));
647+
const hasRuntimeError = visibleAccountSnapshots.some((account) => typeof account.lastError === 'string' && account.lastError.trim())
648+
|| Boolean(channelSummary?.error?.trim() || channelSummary?.lastError?.trim());
649+
const baseGroupStatus = pickChannelRuntimeStatus(visibleAccountSnapshots, channelSummary);
650+
const groupStatus = !gatewayStatus && ctx.gatewayManager.getStatus().state === 'running'
651+
? 'degraded'
652+
: gatewayHealthState && !hasRuntimeError && baseGroupStatus === 'connected'
653+
? 'degraded'
654+
: pickChannelRuntimeStatus(visibleAccountSnapshots, channelSummary, {
655+
gatewayHealthState,
656+
});
657+
588658
channels.push({
589659
channelType: uiChannelType,
590660
defaultAccountId,
591-
status: pickChannelRuntimeStatus(runtimeAccounts, channelSummary),
661+
status: groupStatus,
662+
statusReason: !gatewayStatus && ctx.gatewayManager.getStatus().state === 'running'
663+
? 'channels_status_timeout'
664+
: groupStatus === 'degraded'
665+
? overlayStatusReason(gatewayHealth, 'gateway_degraded')
666+
: undefined,
592667
accounts,
593668
});
594669
}
@@ -597,7 +672,7 @@ async function buildChannelAccountsView(
597672
logger.info(
598673
`[channels.accounts] response probe=${options?.probe === true ? '1' : '0'} elapsedMs=${Date.now() - startedAt} view=${sorted.map((item) => `${item.channelType}:${item.status}`).join(',')}`
599674
);
600-
return sorted;
675+
return { channels: sorted, gatewayHealth };
601676
}
602677

603678
function buildChannelTargetLabel(baseLabel: string, value: string): string {
@@ -1193,8 +1268,8 @@ export async function handleChannelRoutes(
11931268
try {
11941269
const probe = url.searchParams.get('probe') === '1';
11951270
logger.info(`[channels.accounts] request probe=${probe ? '1' : '0'}`);
1196-
const channels = await buildChannelAccountsView(ctx, { probe });
1197-
sendJson(res, 200, { success: true, channels });
1271+
const { channels, gatewayHealth } = await buildChannelAccountsView(ctx, { probe });
1272+
sendJson(res, 200, { success: true, channels, gatewayHealth });
11981273
} catch (error) {
11991274
sendJson(res, 500, { success: false, error: String(error) });
12001275
}

electron/api/routes/diagnostics.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { open } from 'node:fs/promises';
2+
import { join } from 'node:path';
3+
import type { IncomingMessage, ServerResponse } from 'http';
4+
import { logger } from '../../utils/logger';
5+
import { getOpenClawConfigDir } from '../../utils/paths';
6+
import { buildGatewayHealthSummary } from '../../utils/gateway-health';
7+
import type { HostApiContext } from '../context';
8+
import { sendJson } from '../route-utils';
9+
import { buildChannelAccountsView, getChannelStatusDiagnostics } from './channels';
10+
11+
const DEFAULT_TAIL_LINES = 200;
12+
13+
async function readTail(filePath: string, tailLines = DEFAULT_TAIL_LINES): Promise<string> {
14+
const safeTailLines = Math.max(1, Math.floor(tailLines));
15+
try {
16+
const file = await open(filePath, 'r');
17+
try {
18+
const stat = await file.stat();
19+
if (stat.size === 0) return '';
20+
21+
const chunkSize = 64 * 1024;
22+
let position = stat.size;
23+
let content = '';
24+
let lineCount = 0;
25+
26+
while (position > 0 && lineCount <= safeTailLines) {
27+
const bytesToRead = Math.min(chunkSize, position);
28+
position -= bytesToRead;
29+
const buffer = Buffer.allocUnsafe(bytesToRead);
30+
const { bytesRead } = await file.read(buffer, 0, bytesToRead, position);
31+
content = `${buffer.subarray(0, bytesRead).toString('utf-8')}${content}`;
32+
lineCount = content.split('\n').length - 1;
33+
}
34+
35+
const lines = content.split('\n');
36+
return lines.length <= safeTailLines ? content : lines.slice(-safeTailLines).join('\n');
37+
} finally {
38+
await file.close();
39+
}
40+
} catch {
41+
return '';
42+
}
43+
}
44+
45+
export async function handleDiagnosticsRoutes(
46+
req: IncomingMessage,
47+
res: ServerResponse,
48+
url: URL,
49+
ctx: HostApiContext,
50+
): Promise<boolean> {
51+
if (url.pathname === '/api/diagnostics/gateway-snapshot' && req.method === 'GET') {
52+
try {
53+
const { channels } = await buildChannelAccountsView(ctx, { probe: false });
54+
const diagnostics = ctx.gatewayManager.getDiagnostics?.() ?? {
55+
consecutiveHeartbeatMisses: 0,
56+
consecutiveRpcFailures: 0,
57+
};
58+
const channelStatusDiagnostics = getChannelStatusDiagnostics();
59+
const gateway = {
60+
...ctx.gatewayManager.getStatus(),
61+
...buildGatewayHealthSummary({
62+
status: ctx.gatewayManager.getStatus(),
63+
diagnostics,
64+
lastChannelsStatusOkAt: channelStatusDiagnostics.lastChannelsStatusOkAt,
65+
lastChannelsStatusFailureAt: channelStatusDiagnostics.lastChannelsStatusFailureAt,
66+
platform: process.platform,
67+
}),
68+
};
69+
const openClawDir = getOpenClawConfigDir();
70+
sendJson(res, 200, {
71+
capturedAt: Date.now(),
72+
platform: process.platform,
73+
gateway,
74+
channels,
75+
clawxLogTail: await logger.readLogFile(DEFAULT_TAIL_LINES),
76+
gatewayLogTail: await readTail(join(openClawDir, 'logs', 'gateway.log')),
77+
gatewayErrLogTail: await readTail(join(openClawDir, 'logs', 'gateway.err.log')),
78+
});
79+
} catch (error) {
80+
sendJson(res, 500, { success: false, error: String(error) });
81+
}
82+
return true;
83+
}
84+
85+
return false;
86+
}

electron/api/server.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { handleSkillRoutes } from './routes/skills';
1515
import { handleFileRoutes } from './routes/files';
1616
import { handleSessionRoutes } from './routes/sessions';
1717
import { handleCronRoutes } from './routes/cron';
18+
import { handleDiagnosticsRoutes } from './routes/diagnostics';
1819
import { sendJson, setCorsHeaders, requireJsonContentType } from './route-utils';
1920

2021
type RouteHandler = (
@@ -35,6 +36,7 @@ const routeHandlers: RouteHandler[] = [
3536
handleFileRoutes,
3637
handleSessionRoutes,
3738
handleCronRoutes,
39+
handleDiagnosticsRoutes,
3840
handleLogRoutes,
3941
handleUsageRoutes,
4042
];

0 commit comments

Comments
 (0)