elizaOS
diff --git a/‎packages/agent/docs/capability-router-remote-plugins.md‎
Lines changed: 43 additions & 12 deletions b/‎packages/agent/docs/capability-router-remote-plugins.md‎
Lines changed: 43 additions & 12 deletions
diff --git a/‎packages/agent/package.json‎
Lines changed: 1 addition & 1 deletion b/‎packages/agent/package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/agent/src/services/remote-capability-endpoint-conformance.test.ts‎
Lines changed: 218 additions & 2 deletions b/‎packages/agent/src/services/remote-capability-endpoint-conformance.test.ts‎
Lines changed: 218 additions & 2 deletions
@@ -994,7 +994,11 @@ metadata, duplicate endpoint ids, duplicate provider reports, malformed endpoint
 ids, non-lowercase provider names, invalid Cloud API base URLs, Cloud API base
 URLs with query or fragment components, non-2xx route results, non-JavaScript
 view asset paths/content types, missing, malformed, or empty-content view asset
-SHA-256 digests, and credential-shaped field names or string values such as tokens,
+SHA-256 digests, missing model results, failed lifecycle calls, unhandled event
+calls, asset integrity values that do not match the recorded asset digest,
+empty action/provider/evaluator/response-handler outputs, missing
+service/app-bridge results, and credential-shaped field names or string values
+such as tokens,
 authorization headers, API keys, passwords, secrets, bearer/basic auth values,
 and URLs with embedded credentials anywhere in the artifact. Every exercised RPC
 target must also start with one of the module ids observed in the live manifest,
@@ -1004,6 +1008,16 @@ target recorded in `conformance.moduleExercises`. The conformance harness keeps
 the required surface summary in `conformance.exercised`, then performs
 additional cheap RPC calls for untouched modules so multi-module endpoints still
 produce per-module exercise evidence without overwriting the summary target.
+The harness fails at observation time when action, provider, evaluator,
+response-handler evaluator, response-handler field evaluator, service, or app
+bridge calls return empty success-shaped payloads, and when lifecycle or event
+calls do not report success.
+When a view asset includes subresource integrity metadata, the harness verifies
+that value against the fetched bundle bytes before recording the observation.
+The live report writer rejects unknown report kinds before writing, only accepts
+lowercase hyphenated report names, enforces `cloud.json` for Cloud and
+`<provider>.json` for provider reports, and writes with exclusive create so a
+second artifact cannot overwrite the first observation.
 `sync.registered` and `sync.registeredModules` must not contain duplicate
 materialized plugin/module identities, and every registered module must have a
 unique trusted `sync.trustDecisions` entry, so full-surface evidence is tied
@@ -1132,11 +1146,13 @@ packages/agent/src/services/remote-capability-endpoint-conformance.test.ts
   building a temporary remote view bundle, starting the reference endpoint,
   running CLI conformance against it with bearer auth, importing the returned
   bundle as JavaScript, and tearing it down.
-- `bun run --cwd packages/agent test:remote-capabilities` passed with 158
-  tests passing and 1 skipped after adding registered-remote component
-  ownership checks, cross-module/local model collision checks, and stale
-  contribution cleanup coverage for disappearing remote modules, plus runtime
-  app route-module collision protection for remote app bridges.
+- `bun run --cwd packages/agent test:remote-capabilities` passed with 188
+  tests passing and 3 skipped. The canonical suite covers registered-remote
+  component ownership checks, cross-module/local model collision checks, stale
+  contribution cleanup coverage for disappearing remote modules, runtime app
+  route-module collision protection for remote app bridges, and live report
+  writer safety for report names, identity, duplicate artifacts, and weak
+  conformance result rejection.
 - `bun run --cwd packages/agent test:remote-capabilities:source-build` passed
   with 2 focused tests passing and 35 adapter tests skipped by name filter.
 - `bun run --cwd packages/agent test:remote-capabilities:provider-live` found
@@ -1207,10 +1223,13 @@ packages/agent/src/services/remote-capability-cloud-sandbox.cloud-smoke.test.ts
   strict scheduled/manual observation, and that the final `test-status` gate
   treats scheduled runs as strict, with required provider endpoints, strict
   live report validation, required artifact upload, and matching live report
-  directories between smoke producers, validators, and uploaded artifacts.
+  directories between smoke producers, validators, and uploaded artifacts. It
+  also audits the package-level `test:remote-capabilities` script so live report
+  writer safety remains in the canonical remote-capability suite.
 - `bun run test:remote-capabilities:live-ci-audit:self-test` mutates those
-  report-directory env vars, artifact upload paths, final `test-status` live
-  job gating, scheduled/manual live observation gates, Cloud
+  report-directory env vars, artifact upload paths, package-level remote
+  capability suite membership, final `test-status` live job gating,
+  scheduled/manual live observation gates, Cloud
   freshness/identity validation flags, provider primary endpoint secret
   enforcement, provider allowed/required lists, and provider GitHub-env
   matching, and proves the live-CI audit fails when smoke output no longer
@@ -1223,18 +1242,30 @@ packages/agent/src/services/remote-capability-cloud-sandbox.cloud-smoke.test.ts
   configured transport URL. The fingerprint helper also strips query/fragment
   components and rejects embedded URL credentials before hashing, matching the
   URL-backed endpoint provider's accepted base URL shape.
+- Live report writers only accept lowercase report names with numbers or
+  hyphens, require Cloud reports to be named `cloud`, require provider reports
+  to be named after their provider, and create report files with exclusive
+  writes, so a duplicate Cloud or provider report cannot silently overwrite an
+  earlier artifact before validation/upload.
 - Conformance reports include an `rpcCalls` ledger that records every canonical
   protocol method used for each exercised surface and module. The live report
   validator requires this ledger to cover every `moduleExercises` entry, every
   summarized required surface, and every evaluator phase (`shouldRun`,
   `prepare`, `prompt`, `process`, response-handler `evaluate`, and field
   `parse`/`handle`), so live evidence proves the endpoint was exercised through
   the standard RPC-like protocol, not only materialized in a manifest.
+- Model, lifecycle, event, service, and app-bridge conformance results must
+  carry their required protocol success fields: `modelResult.result`,
+  `lifecycleResult.ok: true`, `eventResult.handled: true`,
+  `serviceResult.result`, and `appBridgeResult.result`.
 - View-asset conformance now preserves manifest-declared asset metadata and
   rejects fetched bundles whose content type or integrity value contradicts the
-  manifest. The live report validator also rejects artifacts whose recorded
-  manifest asset metadata disagrees with the fetched asset metadata or whose
-  fetched JavaScript bundle digest is the empty SHA-256 digest.
+  manifest, whose integrity value does not include a SHA-256 token, or whose
+  integrity value does not match the fetched bytes. The live report validator
+  also rejects artifacts whose recorded manifest asset metadata disagrees with
+  the fetched asset metadata, whose integrity value lacks or does not match the
+  recorded SHA-256 digest, or whose fetched JavaScript bundle digest is the empty
+  SHA-256 digest.
 - Runtime live summaries include `runtime.remotePlugins`, keyed by plugin name,
   endpoint id, and module id. The validator requires this runtime identity list
   to match `sync.registeredModules` exactly, so count totals cannot stand in for
 
@@ -46,7 +46,7 @@
     "format:fix": "bunx @biomejs/biome format --write src",
     "pack:dry-run": "cd dist && npm pack --dry-run",
     "test": "vitest run --config vitest.config.ts",
-    "test:remote-capabilities": "cd ../.. && bunx vitest run packages/agent/src/services/remote-plugin-adapter.test.ts packages/agent/src/services/remote-capability-router.test.ts packages/agent/src/services/remote-capability-endpoint-provider.test.ts packages/agent/src/services/remote-capability-endpoint-conformance.test.ts packages/agent/src/services/remote-capability-cloud-sandbox.test.ts packages/agent/src/api/remote-capability-routes.test.ts packages/agent/src/__tests__/views-registry-integration.test.ts packages/core/src/capabilities/index.test.ts --coverage.enabled=false",
+    "test:remote-capabilities": "cd ../.. && bunx vitest run packages/agent/src/services/remote-plugin-adapter.test.ts packages/agent/src/services/remote-capability-router.test.ts packages/agent/src/services/remote-capability-endpoint-provider.test.ts packages/agent/src/services/remote-capability-endpoint-conformance.test.ts packages/agent/src/services/remote-capability-cloud-sandbox.test.ts packages/agent/src/services/remote-capability-live-report.test.ts packages/agent/src/api/remote-capability-routes.test.ts packages/agent/src/__tests__/views-registry-integration.test.ts packages/core/src/capabilities/index.test.ts --coverage.enabled=false",
     "test:remote-capabilities:source-build": "cd ../.. && bunx vitest run packages/agent/src/services/remote-plugin-adapter.test.ts -t \"builds a remote plugin from source|loads a built remote plugin from a separate capability server process\" --coverage.enabled=false",
     "test:remote-capabilities:docker": "cd ../.. && ELIZA_REMOTE_CAPABILITY_DOCKER_SMOKE=1 bunx vitest run packages/agent/src/services/remote-plugin-adapter.test.ts -t \"Docker container capability server\" --coverage.enabled=false",
     "test:remote-capabilities:cloud-live": "cd ../.. && ELIZA_REMOTE_CAPABILITY_CLOUD_LIVE=1 bunx vitest run packages/agent/src/services/remote-capability-cloud-sandbox.cloud-smoke.test.ts --coverage.enabled=false",
 
@@ -489,6 +489,108 @@ describe("remote capability endpoint conformance", () => {
     );
   });
 
+  it.each([
+    [
+      "plugin.action.invoke",
+      "action",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty action result.',
+    ],
+    [
+      "plugin.provider.get",
+      "provider",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty provider result.',
+    ],
+    [
+      "plugin.model.invoke",
+      "model",
+      {},
+      "result is required.",
+    ],
+    [
+      "plugin.lifecycle.call",
+      "lifecycle",
+      { ok: false },
+      'Capability endpoint "remote-endpoint" returned a failed lifecycle result.',
+    ],
+    [
+      "plugin.event.handle",
+      "event",
+      { handled: false },
+      'Capability endpoint "remote-endpoint" returned an unhandled event result.',
+    ],
+    [
+      "plugin.service.call",
+      "service",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty service result.',
+    ],
+    [
+      "plugin.appBridge.call",
+      "appBridge",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty app bridge result.',
+    ],
+  ] as const)(
+    "fails when %s returns weak conformance evidence",
+    async (method, surface, result, message) => {
+      installMinimalFixtureFetch({ [method]: result });
+
+      await expect(
+        assertRemoteCapabilityEndpointConformance({
+          endpoint: {
+            id: "remote-endpoint",
+            baseUrl: "https://remote.example.test",
+          },
+          requiredSurfaces: [surface],
+        }),
+      ).rejects.toThrow(message);
+    },
+  );
+
+  it.each([
+    [
+      "plugin.evaluator.process",
+      "evaluator",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty evaluator process result.',
+    ],
+    [
+      "plugin.responseHandlerEvaluator.evaluate",
+      "responseHandlerEvaluator",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty response-handler evaluator result.',
+    ],
+    [
+      "plugin.responseHandlerFieldEvaluator.parse",
+      "responseHandlerFieldEvaluator",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty response-handler field evaluator parse result.',
+    ],
+    [
+      "plugin.responseHandlerFieldEvaluator.handle",
+      "responseHandlerFieldEvaluator",
+      {},
+      'Capability endpoint "remote-endpoint" returned an empty response-handler field evaluator handle result.',
+    ],
+  ] as const)(
+    "fails when %s returns weak staged conformance evidence",
+    async (method, surface, result, message) => {
+      installMinimalFixtureFetch({ [method]: result });
+
+      await expect(
+        assertRemoteCapabilityEndpointConformance({
+          endpoint: {
+            id: "remote-endpoint",
+            baseUrl: "https://remote.example.test",
+          },
+          requiredSurfaces: [surface],
+        }),
+      ).rejects.toThrow(message);
+    },
+  );
+
   it("fails when remote view conformance returns a non-JavaScript asset", async () => {
     installMinimalFixtureFetch({
       "plugin.asset.get": {
@@ -567,6 +669,83 @@ describe("remote capability endpoint conformance", () => {
       'Capability endpoint "remote-endpoint" returned a view asset integrity value that does not match its manifest.',
     );
   });
+
+  it("fails when remote view asset integrity does not match the returned bytes", async () => {
+    installMinimalFixtureFetch(
+      {
+        "plugin.asset.get": {
+          ...CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.asset,
+          integrity: "sha256-deadbeef",
+        },
+      },
+      {
+        modules: [
+          {
+            ...CAPABILITY_ROUTER_PROTOCOL_FIXTURE.module,
+            views: [
+              {
+                ...CAPABILITY_ROUTER_PROTOCOL_FIXTURE.module.views[0],
+                integrity: "sha256-deadbeef",
+              },
+            ],
+          },
+        ],
+      },
+    );
+
+    await expect(
+      assertRemoteCapabilityEndpointConformance({
+        endpoint: {
+          id: "remote-endpoint",
+          baseUrl: "https://remote.example.test",
+        },
+        requiredSurfaces: ["viewAsset"],
+      }),
+    ).rejects.toThrow(
+      'Capability endpoint "remote-endpoint" returned a view asset integrity value that does not match its bytes.',
+    );
+  });
+
+  it("fails when remote view asset integrity lacks a sha256 token", async () => {
+    const assetBytes = Buffer.from(
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.asset.bodyBase64,
+      "base64",
+    );
+    const integrity = `sha384-${createHash("sha384").update(assetBytes).digest("base64")}`;
+    installMinimalFixtureFetch(
+      {
+        "plugin.asset.get": {
+          ...CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.asset,
+          integrity,
+        },
+      },
+      {
+        modules: [
+          {
+            ...CAPABILITY_ROUTER_PROTOCOL_FIXTURE.module,
+            views: [
+              {
+                ...CAPABILITY_ROUTER_PROTOCOL_FIXTURE.module.views[0],
+                integrity,
+              },
+            ],
+          },
+        ],
+      },
+    );
+
+    await expect(
+      assertRemoteCapabilityEndpointConformance({
+        endpoint: {
+          id: "remote-endpoint",
+          baseUrl: "https://remote.example.test",
+        },
+        requiredSurfaces: ["viewAsset"],
+      }),
+    ).rejects.toThrow(
+      'Capability endpoint "remote-endpoint" returned a view asset integrity value without a sha256 digest.',
+    );
+  });
 });
 
 function installMinimalFixtureFetch(
@@ -575,6 +754,43 @@ function installMinimalFixtureFetch(
     modules?: unknown[];
   } = {},
 ): void {
+  const results: Record<string, unknown> = {
+    "plugin.action.invoke": CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.action,
+    "plugin.provider.get": CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.provider,
+    "plugin.route.call": CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.route,
+    "plugin.asset.get": CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.asset,
+    "plugin.model.invoke": CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.model,
+    "plugin.lifecycle.call":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.lifecycle,
+    "plugin.event.handle": CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.event,
+    "plugin.service.call": CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.service,
+    "plugin.appBridge.call":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.appBridge,
+    "plugin.evaluator.shouldRun":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.evaluatorShouldRun,
+    "plugin.evaluator.prepare":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.evaluatorPrepare,
+    "plugin.evaluator.prompt":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.evaluatorPrompt,
+    "plugin.evaluator.process":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results.evaluatorProcess,
+    "plugin.responseHandlerEvaluator.shouldRun":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results
+        .responseHandlerEvaluatorShouldRun,
+    "plugin.responseHandlerEvaluator.evaluate":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results
+        .responseHandlerEvaluatorEvaluate,
+    "plugin.responseHandlerFieldEvaluator.shouldRun":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results
+        .responseHandlerFieldEvaluatorShouldRun,
+    "plugin.responseHandlerFieldEvaluator.parse":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results
+        .responseHandlerFieldEvaluatorParse,
+    "plugin.responseHandlerFieldEvaluator.handle":
+      CAPABILITY_ROUTER_PROTOCOL_FIXTURE.results
+        .responseHandlerFieldEvaluatorHandle,
+    ...resultsByMethod,
+  };
   globalThis.fetch = vi.fn(async (url: string | URL, init?: RequestInit) => {
     const body = init?.body
       ? (JSON.parse(String(init.body)) as { method?: string })
@@ -600,8 +816,8 @@ function installMinimalFixtureFetch(
         },
       });
     }
-    if (body?.method && body.method in resultsByMethod) {
-      return jsonResponse({ ok: true, result: resultsByMethod[body.method] });
+    if (body?.method && body.method in results) {
+      return jsonResponse({ ok: true, result: results[body.method] });
     }
     return jsonResponse({ ok: false, error: { message: "unexpected" } }, 404);
   }) as unknown as typeof fetch;