elizaOS
diff --git a/‎packages/agent/docs/capability-router-remote-plugins.md‎
Lines changed: 5 additions & 3 deletions b/‎packages/agent/docs/capability-router-remote-plugins.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎packages/app-core/src/benchmark/server.ts‎
Lines changed: 127 additions & 32 deletions b/‎packages/app-core/src/benchmark/server.ts‎
Lines changed: 127 additions & 32 deletions
diff --git a/‎packages/benchmarks/orchestrator/adapters.py‎
Lines changed: 0 additions & 11 deletions b/‎packages/benchmarks/orchestrator/adapters.py‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎packages/core/src/capabilities/index.test.ts‎
Lines changed: 71 additions & 0 deletions b/‎packages/core/src/capabilities/index.test.ts‎
Lines changed: 71 additions & 0 deletions
@@ -427,12 +427,14 @@ When multiple endpoints are configured:
   `capabilityEndpointId`, and the materialized plugin carries that endpoint id
   on every remote plugin RPC.
 - Outbound remote route RPC calls validate callable HTTP methods, local absolute
-  app paths, and safe request headers before crossing the capability boundary.
-  Outbound remote asset RPC calls validate safe asset paths before dispatch.
+  app paths, safe request headers, and safe query keys/values before crossing
+  the capability boundary. Outbound remote asset RPC calls validate safe asset
+  paths before dispatch.
 - Outbound remote plugin RPC calls validate module ids and target identifiers
   such as action, provider, evaluator, event, model, service, lifecycle, and app
   bridge names before crossing the capability boundary. Service method calls use
-  the same identifier and reserved-name rules as service manifests.
+  the same identifier and reserved-name rules as service manifests. Explicit
+  endpoint ids on routed RPC calls are also validated before dispatch.
 - Remote route and app-bridge route calls do not copy local or remote
   authorization, cookie, API-key, or auth-token headers across the boundary.
   Endpoint authentication stays in the capability-router transport layer instead
 
@@ -91,6 +91,63 @@ autoWireCerebras();
 const BENCH_TOKEN = process.env.ELIZA_BENCH_TOKEN?.trim() || null;
 const OPENROUTER_PLUGIN_MODULE: string = "@elizaos/plugin-openrouter";
 
+const OPENAI_COMPAT_MAX_ATTEMPTS = envPositiveInt(
+  "CEREBRAS_BENCH_MAX_ATTEMPTS",
+  4,
+);
+const OPENAI_COMPAT_RETRY_BASE_MS = envPositiveInt(
+  "CEREBRAS_BENCH_RETRY_BASE_MS",
+  4000,
+);
+const OPENAI_COMPAT_RETRY_MAX_MS = envPositiveInt(
+  "CEREBRAS_BENCH_RETRY_MAX_MS",
+  30000,
+);
+
+function envPositiveInt(name: string, fallback: number): number {
+  const raw = process.env[name];
+  if (!raw) return fallback;
+  const parsed = Number.parseInt(raw, 10);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function isRetryableOpenAiCompatibleStatus(status: number): boolean {
+  return status === 408 || status === 409 || status === 429 || status >= 500;
+}
+
+function openAiCompatibleRetryDelayMs(
+  response: Response,
+  attempt: number,
+): number {
+  const retryAfter = response.headers.get("retry-after");
+  if (retryAfter) {
+    const seconds = Number.parseFloat(retryAfter);
+    if (Number.isFinite(seconds) && seconds > 0) {
+      return Math.min(
+        Math.ceil(seconds * 1000),
+        OPENAI_COMPAT_RETRY_MAX_MS,
+      );
+    }
+    const timestamp = Date.parse(retryAfter);
+    if (Number.isFinite(timestamp)) {
+      return Math.min(
+        Math.max(timestamp - Date.now(), 0),
+        OPENAI_COMPAT_RETRY_MAX_MS,
+      );
+    }
+  }
+  return (
+    Math.min(
+      OPENAI_COMPAT_RETRY_BASE_MS * 2 ** Math.max(attempt - 1, 0),
+      OPENAI_COMPAT_RETRY_MAX_MS,
+    ) + Math.floor(Math.random() * 250)
+  );
+}
+
 function normalizeBenchmarkTaskAgentEnv(): void {
   const benchmarkRequested = process.env.BENCHMARK_TASK_AGENT?.trim();
   const requested =
@@ -390,26 +447,45 @@ async function callOpenAiCompatibleActionCalling(params: {
 } | null> {
   const config = resolveOpenAiCompatibleActionCallingConfig();
   if (!config) return null;
-  const response = await fetch(chatCompletionsUrl(config.baseUrl), {
-    method: "POST",
-    headers: {
-      Authorization: `Bearer ${config.apiKey}`,
-      "Content-Type": "application/json",
-    },
-    body: JSON.stringify({
-      model: config.model,
-      messages: params.messages,
-      tools: params.tools,
-      tool_choice:
-        params.toolChoice === "none"
-          ? "none"
-          : params.toolChoice === "auto"
-            ? "required"
-            : params.toolChoice || "required",
-      max_tokens: params.maxTokens,
-      temperature: params.temperature,
-    }),
+  const requestBody = JSON.stringify({
+    model: config.model,
+    messages: params.messages,
+    tools: params.tools,
+    tool_choice:
+      params.toolChoice === "none"
+        ? "none"
+        : params.toolChoice === "auto"
+          ? "required"
+          : params.toolChoice || "required",
+    max_tokens: params.maxTokens,
+    temperature: params.temperature,
   });
+  let response: Response | null = null;
+  for (let attempt = 1; attempt <= OPENAI_COMPAT_MAX_ATTEMPTS; attempt += 1) {
+    response = await fetch(chatCompletionsUrl(config.baseUrl), {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${config.apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: requestBody,
+    });
+    if (
+      response.ok ||
+      !isRetryableOpenAiCompatibleStatus(response.status) ||
+      attempt >= OPENAI_COMPAT_MAX_ATTEMPTS
+    ) {
+      break;
+    }
+    const delayMs = openAiCompatibleRetryDelayMs(response, attempt);
+    elizaLogger.warn(
+      `[bench] OpenAI-compatible action-calling request failed (${response.status}); retrying in ${delayMs}ms (attempt ${attempt}/${OPENAI_COMPAT_MAX_ATTEMPTS})`,
+    );
+    await sleep(delayMs);
+  }
+  if (!response) {
+    throw new Error("OpenAI-compatible action-calling request was not sent");
+  }
   if (!response.ok) {
     const body = await response.text().catch(() => "");
     throw new Error(
@@ -443,20 +519,39 @@ async function callOpenAiCompatibleText(params: {
 } | null> {
   const config = resolveOpenAiCompatibleActionCallingConfig();
   if (!config) return null;
-  const response = await fetch(chatCompletionsUrl(config.baseUrl), {
-    method: "POST",
-    headers: {
-      Authorization: `Bearer ${config.apiKey}`,
-      "Content-Type": "application/json",
-    },
-    body: JSON.stringify({
-      model: config.model,
-      messages: [{ role: "user", content: params.prompt }],
-      max_tokens: params.maxTokens,
-      temperature: params.temperature,
-      ...(config.provider === "cerebras" ? { reasoning_effort: "low" } : {}),
-    }),
+  const requestBody = JSON.stringify({
+    model: config.model,
+    messages: [{ role: "user", content: params.prompt }],
+    max_tokens: params.maxTokens,
+    temperature: params.temperature,
+    ...(config.provider === "cerebras" ? { reasoning_effort: "low" } : {}),
   });
+  let response: Response | null = null;
+  for (let attempt = 1; attempt <= OPENAI_COMPAT_MAX_ATTEMPTS; attempt += 1) {
+    response = await fetch(chatCompletionsUrl(config.baseUrl), {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${config.apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: requestBody,
+    });
+    if (
+      response.ok ||
+      !isRetryableOpenAiCompatibleStatus(response.status) ||
+      attempt >= OPENAI_COMPAT_MAX_ATTEMPTS
+    ) {
+      break;
+    }
+    const delayMs = openAiCompatibleRetryDelayMs(response, attempt);
+    elizaLogger.warn(
+      `[bench] OpenAI-compatible text request failed (${response.status}); retrying in ${delayMs}ms (attempt ${attempt}/${OPENAI_COMPAT_MAX_ATTEMPTS})`,
+    );
+    await sleep(delayMs);
+  }
+  if (!response) {
+    throw new Error("OpenAI-compatible text request was not sent");
+  }
   if (!response.ok) {
     const body = await response.text().catch(() => "");
     throw new Error(
 
@@ -94,21 +94,10 @@ def _json_score(path: Path) -> ScoreSummary:
     # current CLI path intentionally fails closed because it has no
     # transcript-in/artifact-out native compactor API.
     "compactbench": ("eliza", "hermes"),
-    # Vending-Bench currently has heuristic/direct providers and an Eliza TS
-    # bridge path. Hermes/OpenClaw labels would still exercise the Eliza bridge
-    # or a non-agent provider, so publish only the concrete Eliza harness row.
-    "vending_bench": ("eliza",),
-    # HyperliquidBench plan generation is wired to the Eliza TS bridge plus a
-    # deterministic Python smoke path. Hermes/OpenClaw labels do not yet select
-    # distinct harness implementations.
-    "hyperliquid_bench": ("eliza",),
     # LOCA has real Eliza and Hermes proxy paths. OpenClaw's current LOCA path
     # is an explicit provider-level smoke mode, not native OpenClaw agent
     # parity, so keep it out of cross-agent result matrices.
     "loca_bench": ("eliza", "hermes"),
-    # The lifecycle benchmark's real bridge mode starts the Eliza benchmark
-    # server; simulate mode is deterministic and not a harness comparison.
-    "orchestrator_lifecycle": ("eliza",),
     # ConfigBench currently has an in-process Eliza handler plus oracle/mock
     # handlers. Hermes/OpenClaw rows were previously scored against the
     # Perfect oracle fallback, which is not a real harness comparison.
 
@@ -757,6 +757,77 @@ describe("capability router", () => {
 		expect(calls).toEqual([]);
 	});
 
+	it("rejects outbound remote plugin route calls with unsafe query keys", async () => {
+		const calls: string[] = [];
+		const router = new RuntimeBrokerCapabilityRouter({
+			invokeRuntime: async (method) => {
+				calls.push(method);
+				return { status: 200 };
+			},
+		});
+
+		await expect(
+			router.plugin.callRoute({
+				moduleId: "remote-weather",
+				method: "GET",
+				path: "/weather/sf",
+				query: { "city\r\nx-injected": "sf" },
+			}),
+		).rejects.toMatchObject({
+			code: "CAPABILITY_DECODE_FAILED",
+			method: "plugin.route.call",
+			message: "query must contain valid query keys.",
+		});
+		expect(calls).toEqual([]);
+	});
+
+	it("rejects outbound remote plugin route calls with unsafe query values", async () => {
+		const calls: string[] = [];
+		const router = new RuntimeBrokerCapabilityRouter({
+			invokeRuntime: async (method) => {
+				calls.push(method);
+				return { status: 200 };
+			},
+		});
+
+		await expect(
+			router.plugin.callRoute({
+				moduleId: "remote-weather",
+				method: "GET",
+				path: "/weather/sf",
+				query: { city: ["sf", "oakland\r\nx-injected: yes"] },
+			}),
+		).rejects.toMatchObject({
+			code: "CAPABILITY_DECODE_FAILED",
+			method: "plugin.route.call",
+			message: "query must contain valid query values.",
+		});
+		expect(calls).toEqual([]);
+	});
+
+	it("rejects outbound remote plugin calls with unsafe endpoint ids", async () => {
+		const calls: string[] = [];
+		const router = new RuntimeBrokerCapabilityRouter({
+			invokeRuntime: async (method) => {
+				calls.push(method);
+				return {};
+			},
+		});
+
+		await expect(
+			router.plugin.invokeAction({
+				endpointId: "primary\r\nsecondary",
+				moduleId: "remote-weather",
+				action: "WEATHER_LOOKUP",
+			}),
+		).rejects.toMatchObject({
+			code: "CAPABILITY_DECODE_FAILED",
+			method: "capability.endpoint",
+			message: "endpointId must not contain control characters.",
+		});
+		expect(calls).toEqual([]);
+	});
+
 	it("rejects outbound remote plugin asset requests with unsafe paths", async () => {
 		const calls: string[] = [];
 		const router = new RuntimeBrokerCapabilityRouter({