Skip to content

Commit cf8dd99

Browse files
committed
fix(desktop): harden lifecycle robustness — external runner, evidence-based updates, unified teardown
- Extract Electron binary + frameworks to ~/.nexu/runtime/nexu-runner.app/ via APFS clone so launchd services never reference the .app bundle, unblocking Finder drag-and-drop reinstalls - Extract controller sidecar to ~/.nexu/runtime/controller-sidecar/ for the same reason; openclaw sidecar already extracted by existing logic - All three extractions use staging dir + atomic rename to prevent half-copies - Version-aware attach: refuse to attach to services from a different app version, build source, userDataPath, or openclawStateDir - Evidence-based update install: after process sweeps, lsof-check critical paths; only abort if .app bundle or sidecar dirs are actually locked - Unified dev/packaged teardown: Cmd+Q, window close, and no-window exit all go through teardownLaunchdServices in both modes - daemon-supervisor circuit breaker: MAX_CONSECUTIVE_RESTARTS=10 with 120s window, emits max_restarts_exceeded reason code - bootoutService tolerates "already gone" errors - runtime-ports.json atomic write (tmp + rename) - Tighter orphan cleanup: prefer launchd label + runtime-ports metadata, fall back to pgrep with node.* prefix and process tree exclusion
1 parent b792fc9 commit cf8dd99

19 files changed

+1593
-176
lines changed

apps/desktop/main/index.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,11 @@ async function runLaunchdColdStart(): Promise<void> {
546546
logColdStart("starting launchd bootstrap");
547547

548548
const isDev = !app.isPackaged;
549-
const paths = resolveLaunchdPaths(app.isPackaged, electronRoot);
549+
const paths = await resolveLaunchdPaths(
550+
app.isPackaged,
551+
electronRoot,
552+
app.getVersion(),
553+
);
550554

551555
const nexuHome = runtimeConfig.paths.nexuHome.replace(
552556
/^~/,
@@ -620,6 +624,11 @@ async function runLaunchdColdStart(): Promise<void> {
620624
openclawExtensionsDir,
621625
skillNodePath,
622626
openclawTmpDir,
627+
appVersion: app.getVersion(),
628+
userDataPath: app.getPath("userData"),
629+
buildSource:
630+
process.env.NEXU_DESKTOP_BUILD_SOURCE ??
631+
(app.isPackaged ? "packaged" : "local-dev"),
623632
});
624633

625634
// Wire launchd-managed units into the orchestrator so the control plane

apps/desktop/main/runtime/daemon-supervisor.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ import type { RuntimeUnitManifest, RuntimeUnitRecord } from "./types";
2929

3030
const LOG_TAIL_LIMIT = 200;
3131
const RECENT_EVENT_LIMIT = 500;
32+
33+
/** Maximum consecutive auto-restart attempts before giving up. */
34+
const MAX_CONSECUTIVE_RESTARTS = 10;
35+
/** If the process ran longer than this before crashing, reset the restart counter. */
36+
const RESTART_WINDOW_MS = 120_000;
3237
let nextRuntimeLogEntryId = 0;
3338
let nextRuntimeActionId = 0;
3439
let nextRuntimeEventCursor = 0;
@@ -362,7 +367,30 @@ export class RuntimeOrchestrator {
362367
if (record.manifest.autoRestart === false) return;
363368
if (record.stoppedByUser) return;
364369

370+
// If the process ran longer than RESTART_WINDOW_MS, it was stable —
371+
// reset the consecutive restart counter.
372+
if (record.startedAt) {
373+
const uptimeMs = Date.now() - new Date(record.startedAt).getTime();
374+
if (uptimeMs > RESTART_WINDOW_MS) {
375+
record.autoRestartAttempts = 0;
376+
}
377+
}
378+
365379
record.autoRestartAttempts += 1;
380+
381+
// Circuit breaker: stop restarting after too many consecutive failures
382+
if (record.autoRestartAttempts > MAX_CONSECUTIVE_RESTARTS) {
383+
setRecordPhase(record, "failed");
384+
record.lastError = `Exceeded ${MAX_CONSECUTIVE_RESTARTS} consecutive restart attempts`;
385+
this.logStateChange(record, {
386+
kind: "lifecycle",
387+
actionId: ensureActionId(record, "auto-restart"),
388+
reasonCode: "max_restarts_exceeded",
389+
message: `auto-restart halted after ${record.autoRestartAttempts} consecutive failures within ${RESTART_WINDOW_MS}ms window`,
390+
});
391+
return;
392+
}
393+
366394
const delayMs = Math.min(
367395
2000 * 2 ** (record.autoRestartAttempts - 1),
368396
MAX_BACKOFF_MS,

apps/desktop/main/runtime/manifests.ts

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,17 +188,44 @@ export function ensurePackagedOpenclawSidecar(
188188
return extractedSidecarRoot;
189189
}
190190

191-
// Clean + extract with retries. Node's rmSync can silently fail on macOS
192-
// (ENOTEMPTY race), so use rm -rf and verify deletion before proceeding.
191+
// Atomic extraction via staging directory: extract to a temporary location,
192+
// verify the critical entry point, then atomically swap into the final path.
193+
// This prevents half-extracted directories if the process is killed mid-extract.
194+
const stagingRoot = `${extractedSidecarRoot}.staging`;
193195
const MAX_RETRIES = 3;
196+
197+
// Clean up any leftover staging directory from a previous interrupted attempt
198+
if (existsSync(stagingRoot)) {
199+
execFileSync("rm", ["-rf", stagingRoot]);
200+
}
201+
194202
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
195203
try {
204+
if (existsSync(stagingRoot)) {
205+
execFileSync("rm", ["-rf", stagingRoot]);
206+
}
207+
mkdirSync(stagingRoot, { recursive: true });
208+
execFileSync("tar", ["-xzf", archivePath, "-C", stagingRoot]);
209+
210+
// Verify critical entry point exists in staging
211+
const stagingEntry = path.resolve(
212+
stagingRoot,
213+
"node_modules/openclaw/openclaw.mjs",
214+
);
215+
if (!existsSync(stagingEntry)) {
216+
throw new Error(
217+
`Extraction verification failed: ${stagingEntry} not found`,
218+
);
219+
}
220+
221+
// Write stamp inside staging
222+
writeFileSync(path.resolve(stagingRoot, ".archive-stamp"), archiveStamp);
223+
224+
// Atomic swap: remove old → rename staging to final
196225
if (existsSync(extractedSidecarRoot)) {
197226
execFileSync("rm", ["-rf", extractedSidecarRoot]);
198227
}
199-
mkdirSync(extractedSidecarRoot, { recursive: true });
200-
execFileSync("tar", ["-xzf", archivePath, "-C", extractedSidecarRoot]);
201-
writeFileSync(stampPath, archiveStamp);
228+
execFileSync("mv", [stagingRoot, extractedSidecarRoot]);
202229
break;
203230
} catch (err) {
204231
if (attempt === MAX_RETRIES - 1) throw err;

apps/desktop/main/services/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export {
1818

1919
export {
2020
bootstrapWithLaunchd,
21+
checkCriticalPathsLocked,
2122
stopAllServices,
2223
teardownLaunchdServices,
2324
ensureNexuProcessesDead,

0 commit comments

Comments
 (0)