Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .cursor/skills/qv-sdk-pr-create/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ TICKET prefix[tags]: subject

## PR Body
```markdown
**Note**: be concise and prefer bullet points.

## 🎯 What problem does this PR solve?
...
```
Expand Down Expand Up @@ -174,6 +172,7 @@ Before outputting the PR description, verify:
- [ ] `[api]` tag has usage example
- [ ] `[mod]` tag has Added/Removed models list
- [ ] Description is concise - bullet points, no fluff
- [ ] Generated helper notes, template instructions, and tool footers are removed from the PR body
- [ ] If diff touches `packages/sdk/package.json` deps/version, the sync skill ran (or `--no-sync` was set with a reminder emitted), and `check:deps-vs-sdk` passes
- [ ] If base is `release-<pkg>-<x.y.z>`, the dual-PR flow ran (or `--no-backmerge` was set), and both PR URLs are reported

Expand Down
59 changes: 54 additions & 5 deletions packages/sdk/client/rpc/node-rpc-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
import type { RuntimeContext } from "@/schemas";

const RPC_INIT_TIMEOUT_MS = 30_000;
const WORKER_STDERR_TAIL_CHARS = 16_384;

const logger = getClientLogger();

Expand Down Expand Up @@ -175,6 +176,19 @@ function bestEffortUnlinkSocket(socketPath: string | null) {
}
}

function appendWorkerStderrTail(current: string, chunk: string) {
const next = current + chunk;
if (next.length <= WORKER_STDERR_TAIL_CHARS) return next;
return next.slice(next.length - WORKER_STDERR_TAIL_CHARS);
}

function createWorkerStartupError(details: string, stderrTail: string) {
const stderr = stderrTail.trimEnd();
if (!stderr) return new Error(details);

return new Error(`${details}\n\nWorker stderr:\n${stderr}`);
}

function resetModuleState() {
rpcInstance = null;
rpcPromise = null;
Expand Down Expand Up @@ -230,6 +244,14 @@ interface SpawnResources {
socketPath: string;
}

interface WorkerStderrStream {
on(event: "data", listener: (chunk: Buffer | string) => void): void;
}

function getWorkerStderr(proc: BareChildProcess): WorkerStderrStream | null {
return (proc as { stderr?: WorkerStderrStream | null }).stderr ?? null;
}

// `bare-runtime` resolves its platform binary with
// `require('bare-runtime-<platform>-<arch>')` and throws a terse
// `No binaries found for target '<platform>-<arch>'` whenever that package —
Expand Down Expand Up @@ -322,12 +344,19 @@ async function ensureRPC(): Promise<RPC> {

rpcPromise = new Promise((resolve, reject) => {
let settled = false;
let workerStderrTail = "";

const timer = setTimeout(() => {
if (settled) return;
settled = true;
const cause = workerStderrTail
? createWorkerStartupError(
"Worker did not establish IPC before the RPC initialization timeout",
workerStderrTail,
)
: undefined;
teardownFailedInit();
reject(new RPCInitTimeoutError(RPC_INIT_TIMEOUT_MS));
reject(new RPCInitTimeoutError(RPC_INIT_TIMEOUT_MS, cause));
}, RPC_INIT_TIMEOUT_MS);

ipcServer = createServer((socket) => {
Expand Down Expand Up @@ -370,7 +399,7 @@ async function ensureRPC(): Promise<RPC> {
],
platform: process.platform,
arch: process.arch,
stdio: ["inherit", "inherit", "inherit"],
stdio: ["inherit", "inherit", "pipe"],
});
} catch (error) {
// `spawn` resolves the bare binary synchronously and can throw before
Expand All @@ -386,6 +415,12 @@ async function ensureRPC(): Promise<RPC> {
}

if (bareWorkerProc) {
getWorkerStderr(bareWorkerProc)?.on("data", (chunk) => {
const text = chunk.toString();
workerStderrTail = appendWorkerStderrTail(workerStderrTail, text);
process.stderr.write(chunk);
});

bareWorkerProc.on(
Comment thread
lauripiisang marked this conversation as resolved.
"exit",
(code: number | null, exitSignal: string | null) => {
Expand All @@ -397,15 +432,29 @@ async function ensureRPC(): Promise<RPC> {
);
return;
}
// Worker died before handshake — reject the init promise.
// Pre-handshake failures are rejected from "close" so stderr has
// drained before we assemble the startup error cause.
},
);

bareWorkerProc.on(
"close",
(...args: unknown[]) => {
if (settled) return;
const code = typeof args[0] === "number" ? args[0] : null;
const exitSignal = typeof args[1] === "string" ? args[1] : null;

// Worker died before handshake. Use close, not exit, so piped
// stderr has drained before we build the error cause.
settled = true;
clearTimeout(timer);
teardownFailedInit();
reject(
new RPCInitTimeoutError(
RPC_INIT_TIMEOUT_MS,
new Error(
`Worker process exited with code ${code} before IPC connection was established`,
createWorkerStartupError(
`Worker process exited with code ${code}, signal ${exitSignal} before IPC connection was established`,
workerStderrTail,
),
),
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Bare worker that fails before the IPC handshake, like a native addon dlopen error.

throw new Error('QVAC_REPRO_NATIVE_LOAD_ERROR: simulated dlopen failure before worker handshake')
55 changes: 55 additions & 0 deletions packages/sdk/test/unit/worker-startup-error.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import test from "brittle";
import path from "node:path";
import { fileURLToPath } from "node:url";

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const nativeLoadErrorMarker = "QVAC_REPRO_NATIVE_LOAD_ERROR";

function collectErrorDetails(error: Error | undefined) {
if (!error) return "";

const cause = (error as { cause?: unknown }).cause;
const causeMessage = cause instanceof Error ? cause.message : "";
return `${error.message}\n${causeMessage}`;
}

test("loadModel() startup failure includes worker stderr in RPC init error cause", async function (t) {
t.timeout(15_000);

process.env["QVAC_WORKER_PATH"] = path.resolve(
__dirname,
"fixtures/native-load-failure-worker.mjs",
);

const { loadModel } = await import("@/client/api/load-model");
const { close } = await import("@/client/rpc/rpc-client");

t.teardown(async () => {
try {
await close();
} catch {}
delete process.env["QVAC_WORKER_PATH"];
});

let startupError: Error | undefined;
try {
await loadModel({
modelSrc: "/tmp/qvac-repro-model.gguf",
modelType: "llamacpp-completion",
});
t.fail("loadModel() resolved unexpectedly - expected worker startup failure");
} catch (error) {
startupError = error as Error;
}

t.ok(startupError, "expected loadModel() to reject");
t.is(
(startupError as { name?: string } | undefined)?.name,
"RPC_INIT_TIMEOUT",
`expected RPC_INIT_TIMEOUT, got name=${(startupError as { name?: string } | undefined)?.name}`,
);
t.ok(
collectErrorDetails(startupError).includes(nativeLoadErrorMarker),
"expected SDK error details to include worker stderr",
);
});
Loading