Merge remote-tracking branch 'origin/develop' into develop

lalalune · lalalune · commit 9b892b875cba · 2026-05-15T03:25:13.000-07:00
diff --git a/packages/app-core/test/app/streaming-visible-text.live.e2e.test.ts b/packages/app-core/test/app/streaming-visible-text.live.e2e.test.ts
@@ -10,7 +10,7 @@
  */
 import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
 import { existsSync } from "node:fs";
-import { mkdir, mkdtemp, rm } from "node:fs/promises";
+import { mkdir, mkdtemp, readdir, rm, symlink, unlink } from "node:fs/promises";
 import {
   createServer,
   type IncomingMessage,
@@ -199,9 +199,37 @@ async function startHarnessServer(args: {
   return server;
 }
 
+async function ensureAgentDevDistLinks(): Promise<void> {
+  const distRoot = path.join(REPO_ROOT, "packages/agent/dist");
+  const nestedSrc = path.join(distRoot, "packages/agent/src");
+  let entries: string[];
+  try {
+    entries = await readdir(nestedSrc);
+  } catch {
+    return;
+  }
+  await Promise.all(
+    entries.map(async (entry) => {
+      const target = path.join(nestedSrc, entry);
+      const link = path.join(distRoot, entry);
+      try {
+        await unlink(link);
+      } catch {
+        /* absent or non-symlink; keep going */
+      }
+      try {
+        await symlink(target, link);
+      } catch {
+        /* best effort; startup will surface any unresolved import */
+      }
+    }),
+  );
+}
+
 async function startStack(): Promise<Stack> {
   const stateRoot = path.join(REPO_ROOT, ".tmp");
   await mkdir(stateRoot, { recursive: true });
+  await ensureAgentDevDistLinks();
   const stateDir = await mkdtemp(path.join(stateRoot, "eliza-streaming-live-"));
   const apiPort = await getFreePort();
   const harnessPort = await getFreePort();
@@ -211,7 +239,10 @@ async function startStack(): Promise<Stack> {
     "node",
     [
       path.join(REPO_ROOT, "packages/app-core/scripts/run-node-tsx.mjs"),
-      path.join(REPO_ROOT, "packages/app-core/test/scripts/start-eliza-live.ts"),
+      path.join(
+        REPO_ROOT,
+        "packages/app-core/test/scripts/start-eliza-live.ts",
+      ),
     ],
     {
       cwd: REPO_ROOT,
diff --git a/plugins/plugin-local-inference/__tests__/voice-attribution-pipeline-wiring.test.ts b/plugins/plugin-local-inference/__tests__/voice-attribution-pipeline-wiring.test.ts
@@ -230,9 +230,8 @@ describe("EngineVoiceBridge — VoiceProfileStore attribution wiring (W3-1 item
 		if (!attributionCalled) {
 			// Encoder not available in test environment — expected.
 			// The bridge emits a console.warn with the turn id and error message.
-			expect(consoleWarnSpy).toHaveBeenCalledWith(
-				expect.stringContaining("[voice-bridge] speaker attribution failed"),
-				expect.anything(),
+			expect(consoleWarnSpy.mock.calls.flat().join(" ")).toContain(
+				"[voice-bridge] speaker attribution failed",
 			);
 		}
 
diff --git a/plugins/plugin-local-inference/src/services/backend.js b/plugins/plugin-local-inference/src/services/backend.js
diff --git a/plugins/plugin-local-inference/src/services/backend.test.ts b/plugins/plugin-local-inference/src/services/backend.test.ts
@@ -65,14 +65,14 @@ describe("gpuLayersForKvOffload", () => {
 });
 
 describe("decideBackend", () => {
-	it("defaults to node-llama-cpp for stock GGUFs", () => {
+	it("defaults to the custom llama-server when available", () => {
 		const decision = decideBackend({
 			override: "auto",
 			catalog: BASE_CATALOG,
 			llamaServerAvailable: true,
 			dflashRequired: false,
 		});
-		expect(decision.backend).toBe("node-llama-cpp");
+		expect(decision.backend).toBe("llama-server");
 		expect(decision.reason).toBe("default");
 	});
 
@@ -177,7 +177,7 @@ describe("decideBackend", () => {
 			llamaServerAvailable: true,
 			dflashRequired: false,
 		});
-		expect(decision.backend).toBe("node-llama-cpp");
+		expect(decision.backend).toBe("llama-server");
 		expect(decision.reason).toBe("default");
 	});
 });
@@ -219,7 +219,7 @@ class FakeBackend implements LocalInferenceBackend {
 }
 
 describe("BackendDispatcher", () => {
-	it("loads node-llama-cpp by default", async () => {
+	it("loads custom llama-server by default", async () => {
 		const node = new FakeBackend("node-llama-cpp");
 		const server = new FakeBackend("llama-server");
 		const d = new BackendDispatcher(
@@ -229,10 +229,10 @@ describe("BackendDispatcher", () => {
 			() => false,
 		);
 		await d.load({ modelPath: "/m.gguf", catalog: BASE_CATALOG });
-		expect(d.activeBackendId()).toBe("node-llama-cpp");
-		expect(node.loaded).toBe(true);
-		expect(server.loaded).toBe(false);
-		expect(await d.generate({ prompt: "hi" })).toBe("node-llama-cpp:reply");
+		expect(d.activeBackendId()).toBe("llama-server");
+		expect(node.loaded).toBe(false);
+		expect(server.loaded).toBe(true);
+		expect(await d.generate({ prompt: "hi" })).toBe("llama-server:reply");
 	});
 
 	it("switches backends when the decision differs and unloads the previous", async () => {
@@ -245,14 +245,14 @@ describe("BackendDispatcher", () => {
 			() => false,
 		);
 		await d.load({ modelPath: "/m.gguf", catalog: BASE_CATALOG });
-		expect(d.activeBackendId()).toBe("node-llama-cpp");
+		expect(d.activeBackendId()).toBe("llama-server");
 
 		const kernelCatalog = withRuntime(BASE_CATALOG, {
 			optimizations: { requiresKernel: ["dflash"] },
 		});
 		await d.load({ modelPath: "/m2.gguf", catalog: kernelCatalog });
 		expect(d.activeBackendId()).toBe("llama-server");
-		expect(node.unloads).toBe(1);
+		expect(node.unloads).toBe(0);
 		expect(server.loaded).toBe(true);
 	});
 
diff --git a/plugins/plugin-local-inference/src/services/backend.ts b/plugins/plugin-local-inference/src/services/backend.ts
@@ -22,7 +22,9 @@
  *      `llama-server` when this is set AND the binary is available;
  *      otherwise we fall back to `node-llama-cpp` unless DFlash is
  *      explicitly required (`ELIZA_DFLASH_REQUIRED=1`).
- *   4. Default: `node-llama-cpp` for stock GGUFs without runtime metadata.
+ *   4. Default: custom `llama-server` when the managed binary is available;
+ *      `node-llama-cpp` is only a last-resort compatibility path for hosts
+ *      without the custom llama.cpp runtime.
  *
  * The dispatcher does NOT own the spawn body — `llama-server` and the
  * node binding own that. It owns selection only, plus a small load-state
@@ -338,6 +340,14 @@ export function decideBackend(input: {
 			unsatisfiedKernels,
 		};
 	}
+	if (llamaServerAvailable) {
+		return {
+			backend: "llama-server",
+			reason: "default",
+			kernels,
+			unsatisfiedKernels,
+		};
+	}
 	return {
 		backend: "node-llama-cpp",
 		reason: "default",
diff --git a/plugins/plugin-local-inference/src/services/voice/interactive-session.e2e.test.ts b/plugins/plugin-local-inference/src/services/voice/interactive-session.e2e.test.ts
@@ -43,6 +43,7 @@ import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
 import { LocalInferenceEngine } from "../engine";
 import { voiceLatencyTracer } from "../latency-trace";
+import type { InstalledModel } from "../types";
 import { makeSpeechWithSilenceFixture } from "./__test-helpers__/synthetic-speech";
 import type { VoiceLifecycleLoaders } from "./lifecycle";
 import { PushMicSource } from "./mic-source";
@@ -206,23 +207,31 @@ afterEach(async () => {
 });
 
 // Is a real model + fused TTS + required kernels present? Conservative gate:
-// the catalog's required kernels are advertised by the installed llama-server
-// AND it is a fused build. Almost never true in CI — that's the point.
-const realBundleId = "eliza-1-2b";
+// the requested bundle is installed, the catalog's required kernels are
+// advertised by the installed llama-server, AND it is a fused build. Almost
+// never true in CI — that's the point.
+const realBundleId = process.env.ELIZA_VOICE_E2E_BUNDLE_ID ?? "eliza-1-2b";
+let realInstalledTarget: InstalledModel | null = null;
 let realBackendPresent = false;
 try {
 	const { findCatalogModel } = await import("@elizaos/shared");
 	const { getDflashRuntimeStatus } = await import("../dflash-server");
+	const { listInstalledModels } = await import("../registry");
 	const entry = findCatalogModel(realBundleId);
+	const installed = await listInstalledModels();
+	realInstalledTarget = installed.find((m) => m.id === realBundleId) ?? null;
 	const status = getDflashRuntimeStatus();
 	const required = entry?.runtime?.optimizations?.requiresKernel ?? [];
 	const advertised = status.capabilities?.kernels ?? null;
 	const kernelsOk =
 		required.length > 0 &&
 		advertised != null &&
 		required.every((k) => (advertised as Record<string, boolean>)[k] === true);
-	realBackendPresent = Boolean(kernelsOk && status.capabilities?.fused);
+	realBackendPresent = Boolean(
+		realInstalledTarget?.bundleRoot && kernelsOk && status.capabilities?.fused,
+	);
 } catch {
+	realInstalledTarget = null;
 	realBackendPresent = false;
 }
 
@@ -540,7 +549,7 @@ describe("interactive voice path — wiring (stub backends)", () => {
 // ── Real-output — gated ────────────────────────────────────────────────────
 
 describe.skipIf(!realBackendPresent)(
-	"interactive voice path — real eliza-1-2b + fused TTS",
+	`interactive voice path — real ${realBundleId} + fused TTS`,
 	() => {
 		it("runs one synthetic-speech turn end to end and produces real audio", async () => {
 			// Only reachable on a box with the bundle + fused build + required
@@ -549,12 +558,10 @@ describe.skipIf(!realBackendPresent)(
 			// loop via `engine.startVoiceSession`.
 			const { localInferenceEngine } = await import("../engine");
 			const eng = localInferenceEngine;
-			const { listInstalledModels } = await import("../registry");
-			const installed = await listInstalledModels();
-			const target = installed.find((m) => m.id === realBundleId);
+			const target = realInstalledTarget;
 			expect(target).toBeTruthy();
 			if (!target?.bundleRoot)
-				throw new Error("real eliza-1-2b bundle has no bundleRoot");
+				throw new Error(`real ${realBundleId} bundle has no bundleRoot`);
 			const targetBundleRoot = target.bundleRoot;
 			await eng.load(target.path);
 			eng.startVoice({ bundleRoot: targetBundleRoot, useFfiBackend: true });
diff --git a/plugins/plugin-local-inference/src/services/voice/voice-duet.e2e.test.ts b/plugins/plugin-local-inference/src/services/voice/voice-duet.e2e.test.ts
@@ -19,16 +19,22 @@
  */
 
 import { describe, expect, it } from "vitest";
+import type { InstalledModel } from "../types";
 
 const ASR_RATE = 16_000;
-const realBundleId = "eliza-1-0_8b";
+const realBundleId =
+	process.env.ELIZA_VOICE_DUET_E2E_BUNDLE_ID ?? "eliza-1-0_8b";
+let realInstalledTarget: InstalledModel | null = null;
 
 /** Probe — lazy import so collection stays cheap when nothing is present. */
 async function probeRealBackend(): Promise<boolean> {
 	try {
 		const { findCatalogModel } = await import("@elizaos/shared");
 		const { getDflashRuntimeStatus } = await import("../dflash-server");
+		const { listInstalledModels } = await import("../registry");
 		const entry = findCatalogModel(realBundleId);
+		const installed = await listInstalledModels();
+		realInstalledTarget = installed.find((m) => m.id === realBundleId) ?? null;
 		const status = getDflashRuntimeStatus();
 		const required = entry?.runtime?.optimizations?.requiresKernel ?? [];
 		const advertised = status.capabilities?.kernels ?? null;
@@ -38,30 +44,33 @@ async function probeRealBackend(): Promise<boolean> {
 			required.every(
 				(k) => (advertised as Record<string, boolean>)[k] === true,
 			);
-		return Boolean(kernelsOk && status.capabilities?.fused);
+		return Boolean(
+			realInstalledTarget?.bundleRoot &&
+				kernelsOk &&
+				status.capabilities?.fused,
+		);
 	} catch {
+		realInstalledTarget = null;
 		return false;
 	}
 }
 
 const realBackendPresent = await probeRealBackend();
 
 describe.skipIf(!realBackendPresent)(
-	"voice:duet — real eliza-1-0_8b + fused TTS",
+	`voice:duet — real ${realBundleId} + fused TTS`,
 	() => {
 		it("boots two engines on the same bundle, wires the duet bridge, and produces audio crossing the loop", async () => {
 			const { LocalInferenceEngine } = await import("../engine");
-			const { listInstalledModels } = await import("../registry");
 			const { DuetAudioBridge } = await import(
-				"../../../../scripts/lib/duet-bridge.mjs"
+				"../../../../../packages/app-core/scripts/lib/duet-bridge.mjs"
 			);
 			const { PushMicSource } = await import("./mic-source");
 
-			const installed = await listInstalledModels();
-			const target = installed.find((m) => m.id === realBundleId);
+			const target = realInstalledTarget;
 			expect(target).toBeTruthy();
 			if (!target?.bundleRoot) {
-				throw new Error("real eliza-1-0_8b bundle has no bundleRoot");
+				throw new Error(`real ${realBundleId} bundle has no bundleRoot`);
 			}
 			const bundleRoot = target.bundleRoot;
 			const pushA = new PushMicSource({ sampleRate: ASR_RATE });