Skip to content

Commit 1c42f7a

Browse files
committed
perf(windows): use persistent whisper-server to eliminate cold-start
Replace the spawn-per-transcription approach (whisper-cli.exe) with a persistent whisper-server.exe process on non-macOS platforms. The server loads the model once at app launch and accepts audio via HTTP POST on localhost, eliminating the ~500ms model-loading overhead on every transcription. Additionally switches from beam-search decoding (--best-of 5 --beam-size 5) to greedy decoding, saving ~600ms per transcription. The LLM correction layer compensates for any quality difference. - Add WhisperServer class with lifecycle management and HTTP-based readiness polling - Server auto-restarts when the user changes whisper models - Falls back to CLI mode if the server fails to start - macOS keeps the existing CLI approach (Metal GPU makes cold-start negligible) Benchmark (JFK 11s audio, small model, 12 threads): Before: ~4,500ms (CLI + beam search) After: ~3,300ms (server + greedy)
1 parent dc6ce0a commit 1c42f7a

File tree

4 files changed

+467
-6
lines changed

4 files changed

+467
-6
lines changed

src/main/app.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { ConfigManager } from "./config/manager";
66
import { createSecretStore } from "./config/secrets";
77
import { ModelManager } from "./models/manager";
88
import { AudioRecorder } from "./audio/recorder";
9-
import { transcribe } from "./audio/whisper";
9+
import { transcribe, initWhisperServer, shutdownWhisperServer } from "./audio/whisper";
1010
import { createLlmProvider } from "./llm/factory";
1111
import { Pipeline } from "./pipeline";
1212
import { ShortcutManager } from "./shortcuts/manager";
@@ -177,6 +177,16 @@ function reloadConfig(): void {
177177
if (!busy) {
178178
setupPipeline();
179179
}
180+
181+
// Restart the whisper-server when the model changes (server auto-skips
182+
// if the model is the same; on macOS this is a no-op).
183+
const newModelPath = config.whisper.model
184+
? modelManager.getModelPath(config.whisper.model)
185+
: "";
186+
initWhisperServer(newModelPath).catch((err) => {
187+
slog.error("Failed to restart whisper-server after config change", err);
188+
});
189+
180190
shortcutManager?.registerShortcutKeys();
181191
shortcutManager?.updateHud();
182192
updateTrayConfig(config);
@@ -272,6 +282,17 @@ app.whenReady().then(async () => {
272282
});
273283

274284
setupPipeline();
285+
286+
// Start persistent whisper-server on non-macOS platforms to eliminate
287+
// the cold-start model-loading overhead on each transcription.
288+
{
289+
const whisperModel = configManager.load().whisper.model;
290+
const initialModelPath = whisperModel ? modelManager.getModelPath(whisperModel) : "";
291+
initWhisperServer(initialModelPath).catch((err) => {
292+
slog.error("Failed to start whisper-server at launch", err);
293+
});
294+
}
295+
275296
historyManager.cleanup();
276297

277298
// Clean up orphaned audio files
@@ -434,6 +455,7 @@ app.on("activate", () => {
434455

435456
app.on("before-quit", () => {
436457
shortcutManager?.stop();
458+
shutdownWhisperServer().catch(() => {});
437459
destroyTray();
438460
for (const win of BrowserWindow.getAllWindows()) {
439461
if (!win.isDestroyed()) win.destroy();

src/main/audio/whisper-server.ts

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
import { spawn, type ChildProcess } from "child_process";
2+
import * as fs from "fs";
3+
import * as http from "http";
4+
import * as net from "net";
5+
import * as path from "path";
6+
import * as os from "os";
7+
import { app } from "electron";
8+
import log from "electron-log/main";
9+
10+
const slog = log.scope("WhisperServer");
11+
12+
const appRoot = app.getAppPath().replace("app.asar", "app.asar.unpacked");
13+
const WHISPER_CPP_DIR = path.join(appRoot, "node_modules/whisper-node/lib/whisper.cpp");
14+
const SERVER_BIN = path.join(WHISPER_CPP_DIR, "whisper-server.exe");
15+
16+
const STARTUP_TIMEOUT_MS = 15_000;
17+
const INFERENCE_TIMEOUT_MS = 30_000;
18+
19+
// On Windows/Linux (CPU-only), using ~75% of cores gives the best throughput
20+
// without starving the OS. Minimum 4 to avoid slowdowns on low-core machines.
21+
const WHISPER_THREADS = Math.max(4, Math.floor(os.cpus().length * 0.75));
22+
23+
function findFreePort(): Promise<number> {
24+
return new Promise((resolve, reject) => {
25+
const server = net.createServer();
26+
server.listen(0, "127.0.0.1", () => {
27+
const addr = server.address() as net.AddressInfo;
28+
server.close(() => resolve(addr.port));
29+
});
30+
server.on("error", reject);
31+
});
32+
}
33+
34+
export class WhisperServer {
35+
private proc: ChildProcess | null = null;
36+
private port = 0;
37+
private modelPath = "";
38+
private ready = false;
39+
private starting = false;
40+
41+
async start(modelPath: string): Promise<void> {
42+
if (this.proc && this.modelPath === modelPath) return;
43+
if (this.proc) await this.stop();
44+
45+
this.modelPath = modelPath;
46+
47+
if (!fs.existsSync(SERVER_BIN)) {
48+
throw new Error(`whisper-server binary not found: ${SERVER_BIN}`);
49+
}
50+
if (!fs.existsSync(modelPath)) {
51+
throw new Error(`Whisper model not found: ${modelPath}`);
52+
}
53+
54+
this.starting = true;
55+
this.ready = false;
56+
this.port = await findFreePort();
57+
58+
slog.info("Starting whisper-server", { port: this.port, model: path.basename(modelPath), threads: WHISPER_THREADS });
59+
60+
this.proc = spawn(SERVER_BIN, [
61+
"--host", "127.0.0.1",
62+
"--port", String(this.port),
63+
"-m", modelPath,
64+
"-t", String(WHISPER_THREADS),
65+
"--no-gpu",
66+
], {
67+
cwd: WHISPER_CPP_DIR,
68+
stdio: "ignore",
69+
windowsHide: true,
70+
});
71+
72+
this.proc.on("exit", (code, signal) => {
73+
// Guard: during app shutdown the console transport may already be torn
74+
// down, causing EPIPE if we try to log here.
75+
try { slog.warn("whisper-server exited", { code, signal }); } catch { /* ignore */ }
76+
this.proc = null;
77+
this.ready = false;
78+
this.starting = false;
79+
});
80+
81+
this.proc.on("error", (err) => {
82+
try { slog.error("whisper-server spawn error", err); } catch { /* ignore */ }
83+
this.proc = null;
84+
this.ready = false;
85+
this.starting = false;
86+
});
87+
88+
await this.waitForReady();
89+
}
90+
91+
async stop(): Promise<void> {
92+
if (!this.proc) return;
93+
94+
try { slog.info("Stopping whisper-server"); } catch { /* ignore */ }
95+
const proc = this.proc;
96+
this.proc = null;
97+
this.ready = false;
98+
this.starting = false;
99+
100+
proc.kill("SIGTERM");
101+
102+
// Give it a moment to exit gracefully, then force kill
103+
await new Promise<void>((resolve) => {
104+
const timeout = setTimeout(() => {
105+
try { proc.kill("SIGKILL"); } catch { /* already dead */ }
106+
resolve();
107+
}, 2000);
108+
proc.on("exit", () => {
109+
clearTimeout(timeout);
110+
resolve();
111+
});
112+
});
113+
}
114+
115+
isReady(): boolean {
116+
return this.ready && this.proc !== null;
117+
}
118+
119+
async transcribe(wavPath: string, language: string, prompt: string): Promise<string> {
120+
if (!this.ready || !this.proc) {
121+
throw new Error("whisper-server is not running");
122+
}
123+
124+
const fileBuffer = fs.readFileSync(wavPath);
125+
const boundary = `----VoxBoundary${Date.now()}`;
126+
127+
const fields: Record<string, string> = {
128+
response_format: "json",
129+
};
130+
if (language && language !== "auto") {
131+
fields.language = language;
132+
}
133+
if (prompt) {
134+
fields.prompt = prompt;
135+
}
136+
137+
// Build multipart body
138+
const parts: Buffer[] = [];
139+
140+
for (const [key, value] of Object.entries(fields)) {
141+
parts.push(Buffer.from(
142+
`--${boundary}\r\nContent-Disposition: form-data; name="${key}"\r\n\r\n${value}\r\n`
143+
));
144+
}
145+
146+
parts.push(Buffer.from(
147+
`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="audio.wav"\r\nContent-Type: audio/wav\r\n\r\n`
148+
));
149+
parts.push(fileBuffer);
150+
parts.push(Buffer.from(`\r\n--${boundary}--\r\n`));
151+
152+
const body = Buffer.concat(parts);
153+
154+
const responseBody = await new Promise<string>((resolve, reject) => {
155+
const req = http.request({
156+
hostname: "127.0.0.1",
157+
port: this.port,
158+
path: "/inference",
159+
method: "POST",
160+
headers: {
161+
"Content-Type": `multipart/form-data; boundary=${boundary}`,
162+
"Content-Length": body.length,
163+
},
164+
timeout: INFERENCE_TIMEOUT_MS,
165+
}, (res) => {
166+
const chunks: Buffer[] = [];
167+
res.on("data", (chunk: Buffer) => chunks.push(chunk));
168+
res.on("end", () => {
169+
const text = Buffer.concat(chunks).toString("utf-8");
170+
if (res.statusCode !== 200) {
171+
reject(new Error(`whisper-server returned ${res.statusCode}: ${text}`));
172+
return;
173+
}
174+
resolve(text);
175+
});
176+
});
177+
178+
req.on("error", reject);
179+
req.on("timeout", () => {
180+
req.destroy(new Error("whisper-server inference timed out"));
181+
});
182+
183+
req.write(body);
184+
req.end();
185+
});
186+
187+
const parsed = JSON.parse(responseBody) as { text?: string };
188+
return (parsed.text ?? "").trim();
189+
}
190+
191+
private async waitForReady(): Promise<void> {
192+
const deadline = Date.now() + STARTUP_TIMEOUT_MS;
193+
const POLL_INTERVAL_MS = 300;
194+
195+
while (Date.now() < deadline) {
196+
if (!this.proc) {
197+
throw new Error("whisper-server exited during startup");
198+
}
199+
200+
const alive = await this.httpProbe();
201+
if (alive) {
202+
this.ready = true;
203+
this.starting = false;
204+
slog.info("whisper-server is ready", { port: this.port });
205+
return;
206+
}
207+
208+
await new Promise<void>((r) => setTimeout(r, POLL_INTERVAL_MS));
209+
}
210+
211+
this.starting = false;
212+
throw new Error(`whisper-server failed to start within ${STARTUP_TIMEOUT_MS}ms`);
213+
}
214+
215+
/** Send a lightweight HTTP request to check if the server is accepting connections. */
216+
private httpProbe(): Promise<boolean> {
217+
return new Promise<boolean>((resolve) => {
218+
const req = http.get({
219+
hostname: "127.0.0.1",
220+
port: this.port,
221+
path: "/",
222+
timeout: 500,
223+
}, (res) => {
224+
res.resume();
225+
resolve(true);
226+
});
227+
req.on("error", () => resolve(false));
228+
req.on("timeout", () => { req.destroy(); resolve(false); });
229+
});
230+
}
231+
}

src/main/audio/whisper.ts

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ import * as fs from "fs";
55
import * as path from "path";
66
import * as os from "os";
77
import { buildWhisperPrompt, buildWhisperArgs } from "../../shared/constants";
8+
import { WhisperServer } from "./whisper-server";
9+
import log from "electron-log/main";
10+
11+
const slog = log.scope("Whisper");
812

913
export interface TranscriptionResult {
1014
text: string;
@@ -19,6 +23,42 @@ const WHISPER_BIN = path.join(
1923
process.platform === "win32" ? "whisper-cli.exe" : "main"
2024
);
2125

26+
// On non-macOS platforms, use a persistent whisper-server process to avoid
27+
// the ~500ms cold-start (model loading) on every transcription.
28+
const USE_SERVER = process.platform !== "darwin";
29+
let server: WhisperServer | null = null;
30+
31+
/**
32+
* Start the persistent whisper-server process (non-macOS only).
33+
* Call this once at app launch. No-ops on macOS.
34+
*/
35+
export async function initWhisperServer(modelPath: string): Promise<void> {
36+
if (!USE_SERVER) return;
37+
if (!modelPath || !fs.existsSync(modelPath)) {
38+
slog.warn("Skipping whisper-server start: no model", modelPath);
39+
return;
40+
}
41+
42+
try {
43+
if (!server) server = new WhisperServer();
44+
await server.start(modelPath);
45+
} catch (err) {
46+
slog.error("Failed to start whisper-server, falling back to CLI", err);
47+
server = null;
48+
}
49+
}
50+
51+
/**
52+
* Stop the persistent whisper-server process.
53+
* Call this on app quit.
54+
*/
55+
export async function shutdownWhisperServer(): Promise<void> {
56+
if (server) {
57+
await server.stop();
58+
server = null;
59+
}
60+
}
61+
2262
export async function transcribe(
2363
audioBuffer: Float32Array,
2464
sampleRate: number,
@@ -46,8 +86,15 @@ export async function transcribe(
4686
&& speechLanguages.length > 0
4787
? speechLanguages[0]
4888
: whisperArgs.language;
49-
const stdout = await runWhisper(modelPath, tempPath, prompt, language, temperature);
50-
const text = parseWhisperOutput(stdout);
89+
90+
let text: string;
91+
92+
if (USE_SERVER && server?.isReady()) {
93+
text = await server.transcribe(tempPath, language, prompt);
94+
} else {
95+
const stdout = await runWhisperCli(modelPath, tempPath, prompt, language, temperature);
96+
text = parseWhisperOutput(stdout);
97+
}
5198

5299
return { text };
53100
} finally {
@@ -64,14 +111,12 @@ const WHISPER_THREADS = process.platform === "darwin"
64111
? 4
65112
: Math.max(4, Math.floor(os.cpus().length * 0.75));
66113

67-
function runWhisper(modelPath: string, filePath: string, prompt: string, language = "auto", temperature?: number): Promise<string> {
114+
function runWhisperCli(modelPath: string, filePath: string, prompt: string, language = "auto", temperature?: number): Promise<string> {
68115
const args = [
69116
"-t", String(WHISPER_THREADS),
70117
"-l", language,
71118
"-m", modelPath,
72119
"-f", filePath,
73-
"--best-of", "5",
74-
"--beam-size", "5",
75120
"--entropy-thold", "2.0",
76121
"--prompt", prompt,
77122
];

0 commit comments

Comments
 (0)