Skip to content

Commit 3b41268

Browse files
committed
feat(agent): runtime operations manager — idempotency-keyed switches, health, hot reload
New `packages/agent/src/runtime/operations/` module: - `manager.ts` — RuntimeOperationManager, the single-flight gate for provider switches, restarts, and reloads. Replaces the ad-hoc `providerSwitchInProgress` boolean. - `repository.ts` — operation-state store, surfaces pending/active/done. - `classifier.ts` + `classifier.test.ts` — decides whether an inbound request is a duplicate of an in-flight op via the idempotency key. - `health.ts` + `health.test.ts` + `health-checks.ts` — runtime health predicates used by the reload/cold strategies. - `cold-strategy.ts`, `reload-hot.ts` — strategy implementations for full restart vs hot reload. - `index.ts` + `types.ts` — module barrel and shared types. Wires the manager through: - `api/provider-switch-routes.ts` — reads Idempotency-Key header, routes through the manager rather than the legacy boolean gate. - `api/server.ts`, `runtime/restart.ts` — refactored against the new manager; the old single-flight scaffolding is gone. - `app-core/scripts/dev-platform.mjs`, `app-core/src/api/client-base.ts`, `client-types-core.ts`, `cli/run-main.ts`, `runtime/error-handlers.ts`, `shared/scripts/generate-keywords.mjs` — companion changes the manager required (dev-platform integration, error surfacing, client-side typing for the new op-status responses). WIP — preserving in a commit so it doesn't sit as uncommitted state on the feature branch. Squash / split as needed in the PR.
1 parent 665fa12 commit 3b41268

20 files changed

Lines changed: 2480 additions & 104 deletions

packages/agent/src/api/provider-switch-routes.ts

Lines changed: 88 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@ import type http from "node:http";
22
import { logger } from "@elizaos/core";
33
import type { ElizaConfig } from "../config/config.js";
44
import { normalizeOnboardingProviderId } from "../contracts/onboarding.js";
5+
import type {
6+
ProviderSwitchIntent,
7+
RuntimeOperationManager,
8+
} from "../runtime/operations/index.js";
59
import type { ReadJsonBodyOptions } from "./http-helpers.js";
610
import {
711
applyOnboardingConnectionConfig,
@@ -27,15 +31,38 @@ export interface ProviderSwitchRouteContext {
2731
) => Promise<T | null>;
2832
saveElizaConfig: (config: ElizaConfig) => void;
2933
scheduleRuntimeRestart: (reason: string) => void;
34+
/**
35+
* Legacy single-flight gate — kept on the context type for now because
36+
* other call sites still set it. This route no longer reads or writes
37+
* the flag; the runtime operation repo's active-op slot is the gate.
38+
*/
3039
providerSwitchInProgress: boolean;
3140
setProviderSwitchInProgress: (value: boolean) => void;
32-
restartRuntime?: (reason: string) => Promise<boolean>;
41+
runtimeOperationManager: RuntimeOperationManager;
3342
}
3443

3544
// ---------------------------------------------------------------------------
3645
// Route handler
3746
// ---------------------------------------------------------------------------
3847

48+
function readIdempotencyKey(
49+
headers: http.IncomingHttpHeaders,
50+
): string | undefined {
51+
for (const [key, value] of Object.entries(headers)) {
52+
if (key.toLowerCase() !== "idempotency-key") continue;
53+
if (typeof value === "string" && value.trim().length > 0) {
54+
return value.trim();
55+
}
56+
if (Array.isArray(value)) {
57+
const first = value.find(
58+
(v) => typeof v === "string" && v.trim().length > 0,
59+
);
60+
if (first) return first.trim();
61+
}
62+
}
63+
return undefined;
64+
}
65+
3966
export async function handleProviderSwitchRoutes(
4067
ctx: ProviderSwitchRouteContext,
4168
): Promise<boolean> {
@@ -59,21 +86,14 @@ export async function handleProviderSwitchRoutes(
5986
return true;
6087
}
6188

62-
if (ctx.providerSwitchInProgress) {
63-
error(res, "Provider switch already in progress", 409);
89+
const trimmedApiKey =
90+
typeof body.apiKey === "string" ? body.apiKey.trim() : undefined;
91+
if (trimmedApiKey && trimmedApiKey.length > 512) {
92+
error(res, "API key is too long", 400);
6493
return true;
6594
}
66-
ctx.setProviderSwitchInProgress(true);
6795

6896
try {
69-
const trimmedApiKey =
70-
typeof body.apiKey === "string" ? body.apiKey.trim() : undefined;
71-
if (trimmedApiKey && trimmedApiKey.length > 512) {
72-
ctx.setProviderSwitchInProgress(false);
73-
error(res, "API key is too long", 400);
74-
return true;
75-
}
76-
7797
const config = state.config;
7898
let connection:
7999
| ReturnType<typeof createProviderSwitchConnection>
@@ -97,7 +117,7 @@ export async function handleProviderSwitchRoutes(
97117
process.env.OPENAI_BASE_URL = `${cloudBaseUrl}/api/v1`;
98118
process.env.OPENAI_API_KEY = cloudApiKey;
99119
}
100-
} else if (normalizedProvider) {
120+
} else {
101121
connection = createProviderSwitchConnection({
102122
provider: normalizedProvider,
103123
apiKey: trimmedApiKey,
@@ -106,36 +126,75 @@ export async function handleProviderSwitchRoutes(
106126
? body.primaryModel.trim()
107127
: undefined,
108128
});
109-
} else {
110-
connection = null;
111129
}
112130

113131
if (!connection) {
114-
ctx.setProviderSwitchInProgress(false);
115132
error(res, "Invalid provider", 400);
116133
return true;
117134
}
118135

119136
await applyOnboardingConnectionConfig(config, connection);
120137
ctx.saveElizaConfig(config);
121138

122-
const restartReason = `provider switch to ${normalizedProvider}`;
123-
const restarted = ctx.restartRuntime
124-
? await ctx.restartRuntime(restartReason)
125-
: false;
126-
if (!restarted) {
127-
ctx.scheduleRuntimeRestart(restartReason);
139+
const intent: ProviderSwitchIntent = {
140+
kind: "provider-switch",
141+
provider: normalizedProvider,
142+
apiKey: trimmedApiKey,
143+
primaryModel:
144+
typeof body.primaryModel === "string"
145+
? body.primaryModel.trim()
146+
: undefined,
147+
};
148+
const idempotencyKey = readIdempotencyKey(req.headers);
149+
150+
const outcome = await ctx.runtimeOperationManager.start({
151+
intent,
152+
idempotencyKey,
153+
});
154+
155+
if (outcome.kind === "accepted") {
156+
logger.info(
157+
`[api] Provider switch accepted: provider=${normalizedProvider} op=${outcome.operation.id}`,
158+
);
159+
json(
160+
res,
161+
{
162+
success: true,
163+
provider: normalizedProvider,
164+
restarting: true,
165+
operationId: outcome.operation.id,
166+
},
167+
202,
168+
);
169+
return true;
128170
}
129171

130-
ctx.setProviderSwitchInProgress(false);
172+
if (outcome.kind === "deduped") {
173+
const op = outcome.operation;
174+
logger.info(
175+
`[api] Provider switch deduped: provider=${normalizedProvider} op=${op.id} status=${op.status}`,
176+
);
177+
json(res, {
178+
success: true,
179+
provider: normalizedProvider,
180+
restarting: op.status === "running" || op.status === "pending",
181+
operationId: op.id,
182+
deduped: true,
183+
});
184+
return true;
185+
}
131186

132-
json(res, {
133-
success: true,
134-
provider: normalizedProvider,
135-
restarting: restarted,
136-
});
187+
// outcome.kind === "rejected-busy"
188+
json(
189+
res,
190+
{
191+
error: "Provider switch already in progress",
192+
activeOperationId: outcome.activeOperationId,
193+
},
194+
409,
195+
);
196+
return true;
137197
} catch (err) {
138-
ctx.setProviderSwitchInProgress(false);
139198
logger.error(
140199
`[api] Provider switch failed: ${err instanceof Error ? err.stack : err}`,
141200
);

packages/agent/src/api/server.ts

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,20 @@ import type { PTYService } from "./parse-action-block.js";
205205
import { handlePermissionRoutes } from "./permissions-routes.js";
206206
import { handlePermissionsExtraRoutes } from "./permissions-routes-extra.js";
207207
import { handlePluginRoutes } from "./plugin-routes.js";
208+
import {
209+
type ClassifyContext,
210+
createColdStrategy,
211+
createHotStrategy,
212+
defaultClassifier,
213+
DefaultRuntimeOperationManager,
214+
getDefaultHealthChecker,
215+
getDefaultRepository,
216+
type RuntimeOperationManager,
217+
} from "../runtime/operations/index.js";
218+
import {
219+
resolvePreferredProviderId,
220+
resolvePrimaryModel,
221+
} from "../runtime/eliza.js";
208222
import { handleProviderSwitchRoutes } from "./provider-switch-routes.js";
209223
import { handleRegistryRoutes } from "./registry-routes.js";
210224
import { RegistryService } from "./registry-service.js";
@@ -1044,6 +1058,46 @@ const clearPairing = _clearPairing;
10441058
/** Guard against concurrent provider switch requests (P0 §3). */
10451059
let providerSwitchInProgress = false;
10461060

1061+
/**
1062+
* Lazy per-process runtime operation manager. Constructed on first
1063+
* request because it needs the per-server `state` reference + the
1064+
* `onRestart` closure. Cached so subsequent requests see the same
1065+
* active-op slot and execution chain.
1066+
*/
1067+
let cachedRuntimeOperationManager: RuntimeOperationManager | null = null;
1068+
1069+
function getOrCreateRuntimeOperationManager(
1070+
state: ServerState,
1071+
restartRuntime: (reason: string) => Promise<boolean>,
1072+
): RuntimeOperationManager {
1073+
if (cachedRuntimeOperationManager) {
1074+
return cachedRuntimeOperationManager;
1075+
}
1076+
const repository = getDefaultRepository();
1077+
const healthChecker = getDefaultHealthChecker();
1078+
const coldStrategy = createColdStrategy({
1079+
restartRuntime: async (reason) => {
1080+
const ok = await restartRuntime(reason);
1081+
if (!ok) return null;
1082+
return state.runtime;
1083+
},
1084+
});
1085+
const hotStrategy = createHotStrategy({});
1086+
const classifyContext = (): ClassifyContext => ({
1087+
currentProvider: resolvePreferredProviderId(state.config),
1088+
currentPrimaryModel: resolvePrimaryModel(state.config),
1089+
});
1090+
cachedRuntimeOperationManager = new DefaultRuntimeOperationManager({
1091+
repository,
1092+
runtime: () => state.runtime,
1093+
classifyContext,
1094+
classifier: defaultClassifier,
1095+
healthChecker,
1096+
strategies: { cold: coldStrategy, hot: hotStrategy },
1097+
});
1098+
return cachedRuntimeOperationManager;
1099+
}
1100+
10471101
// PluginConfigMutationRejection, resolvePluginConfigMutationRejections,
10481102
// WalletExportRejection, resolveWalletExportRejection
10491103
// extracted to server-helpers-plugin.ts and server-helpers-wallet.ts respectively.
@@ -1323,6 +1377,10 @@ async function handleRequest(
13231377
};
13241378

13251379
// ── POST /api/provider/switch (extracted to provider-switch-routes.ts) ──
1380+
const runtimeOperationManager = getOrCreateRuntimeOperationManager(
1381+
state,
1382+
restartRuntime,
1383+
);
13261384
if (
13271385
await handleProviderSwitchRoutes({
13281386
req,
@@ -1339,7 +1397,7 @@ async function handleRequest(
13391397
setProviderSwitchInProgress: (v: boolean) => {
13401398
providerSwitchInProgress = v;
13411399
},
1342-
restartRuntime,
1400+
runtimeOperationManager,
13431401
})
13441402
) {
13451403
return;

0 commit comments

Comments
 (0)