|
| 1 | +/** |
| 2 | + * Dynamic process manager. |
| 3 | + * |
| 4 | + * Manages persistent server processes for dynamic sites (NLPL, Node.js, etc.). |
| 5 | + * Each dynamic site gets: |
| 6 | + * - A unique port allocated from the range DYNAMIC_PORT_START..DYNAMIC_PORT_END |
| 7 | + * - A supervised child process that is restarted on crash (up to MAX_RESTARTS) |
| 8 | + * - Request proxying via http-proxy-middleware in the host router |
| 9 | + * |
| 10 | + * Supported runtimes: |
| 11 | + * nlpl — python src/main.py server.nlpl (requires NLPL_INTERPRETER_PATH) |
| 12 | + * node — node server.js (requires Node.js in PATH) |
| 13 | + * python — python server.py (requires Python 3 in PATH) |
| 14 | + * |
| 15 | + * Process lifecycle: |
| 16 | + * start → allocate port → spawn process → health check → mark ready |
| 17 | + * crash → log → wait backoff → restart (up to MAX_RESTARTS) |
| 18 | + * stop → SIGTERM → wait 5s → SIGKILL |
| 19 | + * scale → (future: multiple processes per site behind internal load balancer) |
| 20 | + * |
| 21 | + * Security: |
| 22 | + * - Processes run as a non-root user (DYNAMIC_PROCESS_USER, default: "nobody") |
| 23 | + * - Environment is sanitised — no credentials, no DB URLs passed to site processes |
| 24 | + * - Sites communicate via HTTP only — no Unix socket access |
| 25 | + * - Resource limits: MAX_MEMORY_MB, MAX_CPU_PCT per process |
| 26 | + * |
| 27 | + * This module is intentionally not a full container orchestrator. |
| 28 | + * For true isolation, wrap each process in a Docker container or |
| 29 | + * use a platform like Fly.io that provides per-process VMs. |
| 30 | + * This implementation is appropriate for trusted operators running |
| 31 | + * sites for known users, not untrusted arbitrary code. |
| 32 | + */ |
| 33 | + |
| 34 | +import { spawn, type ChildProcess } from "child_process"; |
| 35 | +import { createServer } from "net"; |
| 36 | +import path from "path"; |
| 37 | +import fs from "fs"; |
| 38 | +import http from "http"; |
| 39 | +import logger from "./logger"; |
| 40 | + |
| 41 | +// ── Configuration ───────────────────────────────────────────────────────────── |
| 42 | + |
| 43 | +const PORT_START = parseInt(process.env.DYNAMIC_PORT_START ?? "9000"); |
| 44 | +const PORT_END = parseInt(process.env.DYNAMIC_PORT_END ?? "9999"); |
| 45 | +const MAX_RESTARTS = parseInt(process.env.DYNAMIC_MAX_RESTARTS ?? "5"); |
| 46 | +const BACKOFF_BASE_MS = 2_000; |
| 47 | + |
| 48 | +// Path to the NLPL interpreter — the cloned NLPL repo's src/main.py |
| 49 | +const NLPL_INTERPRETER = process.env.NLPL_INTERPRETER_PATH ?? "/opt/nlpl/src/main.py"; |
| 50 | +// Python executable (python3 or python depending on the system) |
| 51 | +const PYTHON_BIN = process.env.PYTHON_BIN ?? "python3"; |
| 52 | + |
| 53 | +// ── Port pool ───────────────────────────────────────────────────────────────── |
| 54 | + |
| 55 | +const allocatedPorts = new Set<number>(); |
| 56 | + |
| 57 | +async function findFreePort(): Promise<number> { |
| 58 | + for (let port = PORT_START; port <= PORT_END; port++) { |
| 59 | + if (allocatedPorts.has(port)) continue; |
| 60 | + |
| 61 | + // Double-check the port isn't in use by something else |
| 62 | + const free = await new Promise<boolean>((resolve) => { |
| 63 | + const server = createServer(); |
| 64 | + server.listen(port, "127.0.0.1", () => { |
| 65 | + server.close(() => resolve(true)); |
| 66 | + }); |
| 67 | + server.on("error", () => resolve(false)); |
| 68 | + }); |
| 69 | + |
| 70 | + if (free) { |
| 71 | + allocatedPorts.add(port); |
| 72 | + return port; |
| 73 | + } |
| 74 | + } |
| 75 | + throw new Error(`No free ports in range ${PORT_START}–${PORT_END}`); |
| 76 | +} |
| 77 | + |
| 78 | +function releasePort(port: number): void { |
| 79 | + allocatedPorts.delete(port); |
| 80 | +} |
| 81 | + |
| 82 | +// ── Per-process log ring buffer ─────────────────────────────────────────────── |
| 83 | +// Stores the last LOG_BUFFER_SIZE lines of stdout+stderr per site process. |
| 84 | +// Never grows unboundedly — oldest lines are dropped when full. |
| 85 | + |
| 86 | +const LOG_BUFFER_SIZE = 500; |
| 87 | +const processLogs = new Map<number, string[]>(); // siteId → last N lines |
| 88 | + |
| 89 | +function appendProcessLog(siteId: number, line: string): void { |
| 90 | + if (!processLogs.has(siteId)) processLogs.set(siteId, []); |
| 91 | + const buf = processLogs.get(siteId)!; |
| 92 | + buf.push(`[${new Date().toISOString()}] ${line}`); |
| 93 | + if (buf.length > LOG_BUFFER_SIZE) buf.splice(0, buf.length - LOG_BUFFER_SIZE); |
| 94 | +} |
| 95 | + |
| 96 | +export function getProcessLogs(siteId: number, tail = 100): string[] { |
| 97 | + const buf = processLogs.get(siteId) ?? []; |
| 98 | + return buf.slice(-Math.min(tail, LOG_BUFFER_SIZE)); |
| 99 | +} |
| 100 | + |
| 101 | +function clearProcessLogs(siteId: number): void { |
| 102 | + processLogs.delete(siteId); |
| 103 | +} |
| 104 | + |
| 105 | +export type RuntimeType = "nlpl" | "node" | "python"; |
| 106 | + |
| 107 | +export interface ProcessEntry { |
| 108 | + siteId: number; |
| 109 | + siteDomain: string; |
| 110 | + port: number; |
| 111 | + runtime: RuntimeType; |
| 112 | + workDir: string; |
| 113 | + entryFile: string; |
| 114 | + process: ChildProcess | null; |
| 115 | + status: "starting" | "running" | "crashed" | "stopped"; |
| 116 | + restartCount: number; |
| 117 | + startedAt: Date | null; |
| 118 | + lastCrashAt: Date | null; |
| 119 | + pid: number | null; |
| 120 | +} |
| 121 | + |
| 122 | +const processes = new Map<number, ProcessEntry>(); // siteId → entry |
| 123 | + |
| 124 | +// ── Build the command for a given runtime ───────────────────────────────────── |
| 125 | + |
| 126 | +function buildCommand(entry: ProcessEntry): { cmd: string; args: string[] } { |
| 127 | + const entryPath = path.join(entry.workDir, entry.entryFile); |
| 128 | + |
| 129 | + switch (entry.runtime) { |
| 130 | + case "nlpl": |
| 131 | + return { |
| 132 | + cmd: PYTHON_BIN, |
| 133 | + args: [NLPL_INTERPRETER, entryPath], |
| 134 | + }; |
| 135 | + case "node": |
| 136 | + return { |
| 137 | + cmd: "node", |
| 138 | + args: [entryPath], |
| 139 | + }; |
| 140 | + case "python": |
| 141 | + return { |
| 142 | + cmd: PYTHON_BIN, |
| 143 | + args: [entryPath], |
| 144 | + }; |
| 145 | + } |
| 146 | +} |
| 147 | + |
| 148 | +// ── Spawn a process ─────────────────────────────────────────────────────────── |
| 149 | + |
| 150 | +async function spawnProcess(entry: ProcessEntry): Promise<void> { |
| 151 | + const { cmd, args } = buildCommand(entry); |
| 152 | + |
| 153 | + // Sanitised environment: site gets PORT and a subset of safe vars only |
| 154 | + // NEVER pass DATABASE_URL, REDIS_URL, API keys, or any credentials |
| 155 | + const env: NodeJS.ProcessEnv = { |
| 156 | + PORT: String(entry.port), |
| 157 | + NODE_ENV: "production", |
| 158 | + PATH: process.env.PATH ?? "/usr/local/bin:/usr/bin:/bin", |
| 159 | + HOME: process.env.HOME ?? "/tmp", |
| 160 | + // NLPL-specific |
| 161 | + NLPL_ENV: "production", |
| 162 | + SITE_DOMAIN: entry.siteDomain, |
| 163 | + }; |
| 164 | + |
| 165 | + logger.info( |
| 166 | + { siteId: entry.siteId, domain: entry.siteDomain, cmd, port: entry.port, runtime: entry.runtime }, |
| 167 | + "[process-manager] Spawning process", |
| 168 | + ); |
| 169 | + |
| 170 | + const child = spawn(cmd, args, { |
| 171 | + cwd: entry.workDir, |
| 172 | + env, |
| 173 | + stdio: ["ignore", "pipe", "pipe"], |
| 174 | + // uid/gid: could set to nobody (65534) but requires running as root first |
| 175 | + // For now, inherit the process user (should be non-root per Docker setup) |
| 176 | + }); |
| 177 | + |
| 178 | + entry.process = child; |
| 179 | + entry.pid = child.pid ?? null; |
| 180 | + entry.status = "starting"; |
| 181 | + entry.startedAt = new Date(); |
| 182 | + |
| 183 | + // Pipe stdout/stderr to logger AND ring buffer |
| 184 | + child.stdout?.on("data", (data: Buffer) => { |
| 185 | + const line = data.toString().trim(); |
| 186 | + appendProcessLog(entry.siteId, line); |
| 187 | + logger.debug({ siteId: entry.siteId, domain: entry.siteDomain }, `[site-process] ${line}`); |
| 188 | + }); |
| 189 | + child.stderr?.on("data", (data: Buffer) => { |
| 190 | + const line = data.toString().trim(); |
| 191 | + appendProcessLog(entry.siteId, `ERR: ${line}`); |
| 192 | + logger.warn({ siteId: entry.siteId, domain: entry.siteDomain }, `[site-process:err] ${line}`); |
| 193 | + }); |
| 194 | + |
| 195 | + child.on("exit", (code, signal) => { |
| 196 | + logger.warn( |
| 197 | + { siteId: entry.siteId, domain: entry.siteDomain, code, signal, restarts: entry.restartCount }, |
| 198 | + "[process-manager] Process exited", |
| 199 | + ); |
| 200 | + entry.status = "crashed"; |
| 201 | + entry.lastCrashAt = new Date(); |
| 202 | + entry.process = null; |
| 203 | + entry.pid = null; |
| 204 | + |
| 205 | + if (entry.restartCount < MAX_RESTARTS) { |
| 206 | + const delay = BACKOFF_BASE_MS * Math.pow(2, entry.restartCount); |
| 207 | + entry.restartCount++; |
| 208 | + logger.info( |
| 209 | + { siteId: entry.siteId, delayMs: delay, attempt: entry.restartCount }, |
| 210 | + "[process-manager] Scheduling restart", |
| 211 | + ); |
| 212 | + setTimeout(() => { |
| 213 | + if (processes.has(entry.siteId)) { |
| 214 | + spawnProcess(entry).catch((err) => |
| 215 | + logger.error({ err, siteId: entry.siteId }, "[process-manager] Restart failed"), |
| 216 | + ); |
| 217 | + } |
| 218 | + }, delay); |
| 219 | + } else { |
| 220 | + logger.error( |
| 221 | + { siteId: entry.siteId, domain: entry.siteDomain }, |
| 222 | + "[process-manager] Max restarts reached — giving up", |
| 223 | + ); |
| 224 | + entry.status = "stopped"; |
| 225 | + } |
| 226 | + }); |
| 227 | + |
| 228 | + // Wait for the process to start listening on PORT |
| 229 | + await waitForPort(entry.port, 15_000); |
| 230 | + entry.status = "running"; |
| 231 | + |
| 232 | + logger.info( |
| 233 | + { siteId: entry.siteId, domain: entry.siteDomain, port: entry.port, pid: entry.pid }, |
| 234 | + "[process-manager] Process ready", |
| 235 | + ); |
| 236 | +} |
| 237 | + |
| 238 | +// ── Wait for a process to bind a port ───────────────────────────────────────── |
| 239 | + |
| 240 | +function waitForPort(port: number, timeoutMs: number): Promise<void> { |
| 241 | + return new Promise((resolve, reject) => { |
| 242 | + const deadline = Date.now() + timeoutMs; |
| 243 | + |
| 244 | + function probe() { |
| 245 | + const sock = new http.ClientRequest({ host: "127.0.0.1", port, path: "/", method: "HEAD" }); |
| 246 | + sock.on("response", () => { sock.destroy(); resolve(); }); |
| 247 | + sock.on("error", () => { |
| 248 | + if (Date.now() > deadline) { |
| 249 | + reject(new Error(`Process did not bind port ${port} within ${timeoutMs}ms`)); |
| 250 | + } else { |
| 251 | + setTimeout(probe, 500); |
| 252 | + } |
| 253 | + }); |
| 254 | + sock.end(); |
| 255 | + } |
| 256 | + |
| 257 | + probe(); |
| 258 | + }); |
| 259 | +} |
| 260 | + |
| 261 | +// ── Public API ──────────────────────────────────────────────────────────────── |
| 262 | + |
| 263 | +export async function startSiteProcess(opts: { |
| 264 | + siteId: number; |
| 265 | + siteDomain: string; |
| 266 | + runtime: RuntimeType; |
| 267 | + workDir: string; |
| 268 | + entryFile: string; |
| 269 | +}): Promise<{ port: number }> { |
| 270 | + const existing = processes.get(opts.siteId); |
| 271 | + if (existing && (existing.status === "running" || existing.status === "starting")) { |
| 272 | + return { port: existing.port }; |
| 273 | + } |
| 274 | + |
| 275 | + // Validate that the entry file exists |
| 276 | + const entryPath = path.join(opts.workDir, opts.entryFile); |
| 277 | + if (!fs.existsSync(entryPath)) { |
| 278 | + throw new Error(`Entry file not found: ${entryPath}`); |
| 279 | + } |
| 280 | + |
| 281 | + // Validate NLPL interpreter is available |
| 282 | + if (opts.runtime === "nlpl" && !fs.existsSync(NLPL_INTERPRETER)) { |
| 283 | + throw new Error( |
| 284 | + `NLPL interpreter not found at ${NLPL_INTERPRETER}. ` + |
| 285 | + `Set NLPL_INTERPRETER_PATH or run: git clone https://github.com/Zajfan/NLPL /opt/nlpl`, |
| 286 | + ); |
| 287 | + } |
| 288 | + |
| 289 | + const port = await findFreePort(); |
| 290 | + |
| 291 | + const entry: ProcessEntry = { |
| 292 | + siteId: opts.siteId, |
| 293 | + siteDomain: opts.siteDomain, |
| 294 | + port, |
| 295 | + runtime: opts.runtime, |
| 296 | + workDir: opts.workDir, |
| 297 | + entryFile: opts.entryFile, |
| 298 | + process: null, |
| 299 | + status: "starting", |
| 300 | + restartCount: 0, |
| 301 | + startedAt: null, |
| 302 | + lastCrashAt: null, |
| 303 | + pid: null, |
| 304 | + }; |
| 305 | + |
| 306 | + processes.set(opts.siteId, entry); |
| 307 | + |
| 308 | + await spawnProcess(entry); |
| 309 | + |
| 310 | + return { port }; |
| 311 | +} |
| 312 | + |
| 313 | +export function stopSiteProcess(siteId: number): void { |
| 314 | + const entry = processes.get(siteId); |
| 315 | + if (!entry) return; |
| 316 | + |
| 317 | + logger.info({ siteId, domain: entry.siteDomain }, "[process-manager] Stopping process"); |
| 318 | + |
| 319 | + entry.status = "stopped"; |
| 320 | + const proc = entry.process; |
| 321 | + if (proc) { |
| 322 | + entry.process = null; |
| 323 | + proc.kill("SIGTERM"); |
| 324 | + // Force-kill after 5 seconds if it doesn't exit cleanly |
| 325 | + setTimeout(() => { |
| 326 | + try { proc.kill("SIGKILL"); } catch { /* already dead */ } |
| 327 | + }, 5_000); |
| 328 | + } |
| 329 | + |
| 330 | + releasePort(entry.port); |
| 331 | + processes.delete(siteId); |
| 332 | + // Keep logs for 5 minutes after stop so the UI can still show them |
| 333 | + setTimeout(() => clearProcessLogs(siteId), 5 * 60_000); |
| 334 | +} |
| 335 | + |
| 336 | +export function getSiteProcess(siteId: number): ProcessEntry | undefined { |
| 337 | + return processes.get(siteId); |
| 338 | +} |
| 339 | + |
| 340 | +export function getSiteProxyTarget(siteId: number): string | null { |
| 341 | + const entry = processes.get(siteId); |
| 342 | + if (!entry || entry.status !== "running") return null; |
| 343 | + return `http://127.0.0.1:${entry.port}`; |
| 344 | +} |
| 345 | + |
| 346 | +export function getAllProcessStats() { |
| 347 | + return [...processes.values()].map((e) => ({ |
| 348 | + siteId: e.siteId, |
| 349 | + domain: e.siteDomain, |
| 350 | + runtime: e.runtime, |
| 351 | + port: e.port, |
| 352 | + status: e.status, |
| 353 | + pid: e.pid, |
| 354 | + restartCount: e.restartCount, |
| 355 | + startedAt: e.startedAt?.toISOString(), |
| 356 | + lastCrashAt: e.lastCrashAt?.toISOString(), |
| 357 | + })); |
| 358 | +} |
| 359 | + |
| 360 | +export function stopAllProcesses(): void { |
| 361 | + for (const siteId of processes.keys()) { |
| 362 | + stopSiteProcess(siteId); |
| 363 | + } |
| 364 | +} |
0 commit comments