Skip to content

Commit 9b892b8

Browse files
committed
Merge remote-tracking branch 'origin/develop' into develop
2 parents bd4cbe0 + 2c77f88 commit 9b892b8

7 files changed

Lines changed: 97 additions & 33 deletions

File tree

packages/app-core/test/app/streaming-visible-text.live.e2e.test.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*/
1111
import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
1212
import { existsSync } from "node:fs";
13-
import { mkdir, mkdtemp, rm } from "node:fs/promises";
13+
import { mkdir, mkdtemp, readdir, rm, symlink, unlink } from "node:fs/promises";
1414
import {
1515
createServer,
1616
type IncomingMessage,
@@ -199,9 +199,37 @@ async function startHarnessServer(args: {
199199
return server;
200200
}
201201

202+
async function ensureAgentDevDistLinks(): Promise<void> {
203+
const distRoot = path.join(REPO_ROOT, "packages/agent/dist");
204+
const nestedSrc = path.join(distRoot, "packages/agent/src");
205+
let entries: string[];
206+
try {
207+
entries = await readdir(nestedSrc);
208+
} catch {
209+
return;
210+
}
211+
await Promise.all(
212+
entries.map(async (entry) => {
213+
const target = path.join(nestedSrc, entry);
214+
const link = path.join(distRoot, entry);
215+
try {
216+
await unlink(link);
217+
} catch {
218+
/* absent or non-symlink; keep going */
219+
}
220+
try {
221+
await symlink(target, link);
222+
} catch {
223+
/* best effort; startup will surface any unresolved import */
224+
}
225+
}),
226+
);
227+
}
228+
202229
async function startStack(): Promise<Stack> {
203230
const stateRoot = path.join(REPO_ROOT, ".tmp");
204231
await mkdir(stateRoot, { recursive: true });
232+
await ensureAgentDevDistLinks();
205233
const stateDir = await mkdtemp(path.join(stateRoot, "eliza-streaming-live-"));
206234
const apiPort = await getFreePort();
207235
const harnessPort = await getFreePort();
@@ -211,7 +239,10 @@ async function startStack(): Promise<Stack> {
211239
"node",
212240
[
213241
path.join(REPO_ROOT, "packages/app-core/scripts/run-node-tsx.mjs"),
214-
path.join(REPO_ROOT, "packages/app-core/test/scripts/start-eliza-live.ts"),
242+
path.join(
243+
REPO_ROOT,
244+
"packages/app-core/test/scripts/start-eliza-live.ts",
245+
),
215246
],
216247
{
217248
cwd: REPO_ROOT,

plugins/plugin-local-inference/__tests__/voice-attribution-pipeline-wiring.test.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,8 @@ describe("EngineVoiceBridge — VoiceProfileStore attribution wiring (W3-1 item
230230
if (!attributionCalled) {
231231
// Encoder not available in test environment — expected.
232232
// The bridge emits a console.warn with the turn id and error message.
233-
expect(consoleWarnSpy).toHaveBeenCalledWith(
234-
expect.stringContaining("[voice-bridge] speaker attribution failed"),
235-
expect.anything(),
233+
expect(consoleWarnSpy.mock.calls.flat().join(" ")).toContain(
234+
"[voice-bridge] speaker attribution failed",
236235
);
237236
}
238237

plugins/plugin-local-inference/src/services/backend.js

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins/plugin-local-inference/src/services/backend.test.ts

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,14 @@ describe("gpuLayersForKvOffload", () => {
6565
});
6666

6767
describe("decideBackend", () => {
68-
it("defaults to node-llama-cpp for stock GGUFs", () => {
68+
it("defaults to the custom llama-server when available", () => {
6969
const decision = decideBackend({
7070
override: "auto",
7171
catalog: BASE_CATALOG,
7272
llamaServerAvailable: true,
7373
dflashRequired: false,
7474
});
75-
expect(decision.backend).toBe("node-llama-cpp");
75+
expect(decision.backend).toBe("llama-server");
7676
expect(decision.reason).toBe("default");
7777
});
7878

@@ -177,7 +177,7 @@ describe("decideBackend", () => {
177177
llamaServerAvailable: true,
178178
dflashRequired: false,
179179
});
180-
expect(decision.backend).toBe("node-llama-cpp");
180+
expect(decision.backend).toBe("llama-server");
181181
expect(decision.reason).toBe("default");
182182
});
183183
});
@@ -219,7 +219,7 @@ class FakeBackend implements LocalInferenceBackend {
219219
}
220220

221221
describe("BackendDispatcher", () => {
222-
it("loads node-llama-cpp by default", async () => {
222+
it("loads custom llama-server by default", async () => {
223223
const node = new FakeBackend("node-llama-cpp");
224224
const server = new FakeBackend("llama-server");
225225
const d = new BackendDispatcher(
@@ -229,10 +229,10 @@ describe("BackendDispatcher", () => {
229229
() => false,
230230
);
231231
await d.load({ modelPath: "/m.gguf", catalog: BASE_CATALOG });
232-
expect(d.activeBackendId()).toBe("node-llama-cpp");
233-
expect(node.loaded).toBe(true);
234-
expect(server.loaded).toBe(false);
235-
expect(await d.generate({ prompt: "hi" })).toBe("node-llama-cpp:reply");
232+
expect(d.activeBackendId()).toBe("llama-server");
233+
expect(node.loaded).toBe(false);
234+
expect(server.loaded).toBe(true);
235+
expect(await d.generate({ prompt: "hi" })).toBe("llama-server:reply");
236236
});
237237

238238
it("switches backends when the decision differs and unloads the previous", async () => {
@@ -245,14 +245,14 @@ describe("BackendDispatcher", () => {
245245
() => false,
246246
);
247247
await d.load({ modelPath: "/m.gguf", catalog: BASE_CATALOG });
248-
expect(d.activeBackendId()).toBe("node-llama-cpp");
248+
expect(d.activeBackendId()).toBe("llama-server");
249249

250250
const kernelCatalog = withRuntime(BASE_CATALOG, {
251251
optimizations: { requiresKernel: ["dflash"] },
252252
});
253253
await d.load({ modelPath: "/m2.gguf", catalog: kernelCatalog });
254254
expect(d.activeBackendId()).toBe("llama-server");
255-
expect(node.unloads).toBe(1);
255+
expect(node.unloads).toBe(0);
256256
expect(server.loaded).toBe(true);
257257
});
258258

plugins/plugin-local-inference/src/services/backend.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
* `llama-server` when this is set AND the binary is available;
2323
* otherwise we fall back to `node-llama-cpp` unless DFlash is
2424
* explicitly required (`ELIZA_DFLASH_REQUIRED=1`).
25-
* 4. Default: `node-llama-cpp` for stock GGUFs without runtime metadata.
25+
* 4. Default: custom `llama-server` when the managed binary is available;
26+
* `node-llama-cpp` is only a last-resort compatibility path for hosts
27+
* without the custom llama.cpp runtime.
2628
*
2729
* The dispatcher does NOT own the spawn body — `llama-server` and the
2830
* node binding own that. It owns selection only, plus a small load-state
@@ -338,6 +340,14 @@ export function decideBackend(input: {
338340
unsatisfiedKernels,
339341
};
340342
}
343+
if (llamaServerAvailable) {
344+
return {
345+
backend: "llama-server",
346+
reason: "default",
347+
kernels,
348+
unsatisfiedKernels,
349+
};
350+
}
341351
return {
342352
backend: "node-llama-cpp",
343353
reason: "default",

plugins/plugin-local-inference/src/services/voice/interactive-session.e2e.test.ts

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import path from "node:path";
4343
import { afterEach, beforeEach, describe, expect, it } from "vitest";
4444
import { LocalInferenceEngine } from "../engine";
4545
import { voiceLatencyTracer } from "../latency-trace";
46+
import type { InstalledModel } from "../types";
4647
import { makeSpeechWithSilenceFixture } from "./__test-helpers__/synthetic-speech";
4748
import type { VoiceLifecycleLoaders } from "./lifecycle";
4849
import { PushMicSource } from "./mic-source";
@@ -206,23 +207,31 @@ afterEach(async () => {
206207
});
207208

208209
// Is a real model + fused TTS + required kernels present? Conservative gate:
209-
// the catalog's required kernels are advertised by the installed llama-server
210-
// AND it is a fused build. Almost never true in CI — that's the point.
211-
const realBundleId = "eliza-1-2b";
210+
// the requested bundle is installed, the catalog's required kernels are
211+
// advertised by the installed llama-server, AND it is a fused build. Almost
212+
// never true in CI — that's the point.
213+
const realBundleId = process.env.ELIZA_VOICE_E2E_BUNDLE_ID ?? "eliza-1-2b";
214+
let realInstalledTarget: InstalledModel | null = null;
212215
let realBackendPresent = false;
213216
try {
214217
const { findCatalogModel } = await import("@elizaos/shared");
215218
const { getDflashRuntimeStatus } = await import("../dflash-server");
219+
const { listInstalledModels } = await import("../registry");
216220
const entry = findCatalogModel(realBundleId);
221+
const installed = await listInstalledModels();
222+
realInstalledTarget = installed.find((m) => m.id === realBundleId) ?? null;
217223
const status = getDflashRuntimeStatus();
218224
const required = entry?.runtime?.optimizations?.requiresKernel ?? [];
219225
const advertised = status.capabilities?.kernels ?? null;
220226
const kernelsOk =
221227
required.length > 0 &&
222228
advertised != null &&
223229
required.every((k) => (advertised as Record<string, boolean>)[k] === true);
224-
realBackendPresent = Boolean(kernelsOk && status.capabilities?.fused);
230+
realBackendPresent = Boolean(
231+
realInstalledTarget?.bundleRoot && kernelsOk && status.capabilities?.fused,
232+
);
225233
} catch {
234+
realInstalledTarget = null;
226235
realBackendPresent = false;
227236
}
228237

@@ -540,7 +549,7 @@ describe("interactive voice path — wiring (stub backends)", () => {
540549
// ── Real-output — gated ────────────────────────────────────────────────────
541550

542551
describe.skipIf(!realBackendPresent)(
543-
"interactive voice path — real eliza-1-2b + fused TTS",
552+
`interactive voice path — real ${realBundleId} + fused TTS`,
544553
() => {
545554
it("runs one synthetic-speech turn end to end and produces real audio", async () => {
546555
// Only reachable on a box with the bundle + fused build + required
@@ -549,12 +558,10 @@ describe.skipIf(!realBackendPresent)(
549558
// loop via `engine.startVoiceSession`.
550559
const { localInferenceEngine } = await import("../engine");
551560
const eng = localInferenceEngine;
552-
const { listInstalledModels } = await import("../registry");
553-
const installed = await listInstalledModels();
554-
const target = installed.find((m) => m.id === realBundleId);
561+
const target = realInstalledTarget;
555562
expect(target).toBeTruthy();
556563
if (!target?.bundleRoot)
557-
throw new Error("real eliza-1-2b bundle has no bundleRoot");
564+
throw new Error(`real ${realBundleId} bundle has no bundleRoot`);
558565
const targetBundleRoot = target.bundleRoot;
559566
await eng.load(target.path);
560567
eng.startVoice({ bundleRoot: targetBundleRoot, useFfiBackend: true });

plugins/plugin-local-inference/src/services/voice/voice-duet.e2e.test.ts

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,22 @@
1919
*/
2020

2121
import { describe, expect, it } from "vitest";
22+
import type { InstalledModel } from "../types";
2223

2324
const ASR_RATE = 16_000;
24-
const realBundleId = "eliza-1-0_8b";
25+
const realBundleId =
26+
process.env.ELIZA_VOICE_DUET_E2E_BUNDLE_ID ?? "eliza-1-0_8b";
27+
let realInstalledTarget: InstalledModel | null = null;
2528

2629
/** Probe — lazy import so collection stays cheap when nothing is present. */
2730
async function probeRealBackend(): Promise<boolean> {
2831
try {
2932
const { findCatalogModel } = await import("@elizaos/shared");
3033
const { getDflashRuntimeStatus } = await import("../dflash-server");
34+
const { listInstalledModels } = await import("../registry");
3135
const entry = findCatalogModel(realBundleId);
36+
const installed = await listInstalledModels();
37+
realInstalledTarget = installed.find((m) => m.id === realBundleId) ?? null;
3238
const status = getDflashRuntimeStatus();
3339
const required = entry?.runtime?.optimizations?.requiresKernel ?? [];
3440
const advertised = status.capabilities?.kernels ?? null;
@@ -38,30 +44,33 @@ async function probeRealBackend(): Promise<boolean> {
3844
required.every(
3945
(k) => (advertised as Record<string, boolean>)[k] === true,
4046
);
41-
return Boolean(kernelsOk && status.capabilities?.fused);
47+
return Boolean(
48+
realInstalledTarget?.bundleRoot &&
49+
kernelsOk &&
50+
status.capabilities?.fused,
51+
);
4252
} catch {
53+
realInstalledTarget = null;
4354
return false;
4455
}
4556
}
4657

4758
const realBackendPresent = await probeRealBackend();
4859

4960
describe.skipIf(!realBackendPresent)(
50-
"voice:duet — real eliza-1-0_8b + fused TTS",
61+
`voice:duet — real ${realBundleId} + fused TTS`,
5162
() => {
5263
it("boots two engines on the same bundle, wires the duet bridge, and produces audio crossing the loop", async () => {
5364
const { LocalInferenceEngine } = await import("../engine");
54-
const { listInstalledModels } = await import("../registry");
5565
const { DuetAudioBridge } = await import(
56-
"../../../../scripts/lib/duet-bridge.mjs"
66+
"../../../../../packages/app-core/scripts/lib/duet-bridge.mjs"
5767
);
5868
const { PushMicSource } = await import("./mic-source");
5969

60-
const installed = await listInstalledModels();
61-
const target = installed.find((m) => m.id === realBundleId);
70+
const target = realInstalledTarget;
6271
expect(target).toBeTruthy();
6372
if (!target?.bundleRoot) {
64-
throw new Error("real eliza-1-0_8b bundle has no bundleRoot");
73+
throw new Error(`real ${realBundleId} bundle has no bundleRoot`);
6574
}
6675
const bundleRoot = target.bundleRoot;
6776
const pushA = new PushMicSource({ sampleRate: ASR_RATE });

0 commit comments

Comments
 (0)