Skip to content

Commit 4377c13

Browse files
author
The No Hands Company
committed
feat: NLPL dynamic site hosting — process manager, frontend panel, examples
Backend (processManager.ts) - Full dynamic process lifecycle: start → port allocation → spawn → health check → ready - Ring buffer log capture: last 500 lines per process, accessible via API - Exponential backoff restarts: 2s → 4s → 8s → ... up to MAX_RESTARTS (default 5) - Supported runtimes: nlpl (python3 + NLPL_INTERPRETER_PATH), node, python - Sanitised process env: PORT, NODE_ENV, SITE_DOMAIN, NLPL_ENV only — no credentials - Port pool: DYNAMIC_PORT_START–DYNAMIC_PORT_END, double-checked for actual availability - SIGTERM → 5s → SIGKILL on stop; logs retained 5 minutes after stop - getAllProcessStats(), getProcessLogs(), getSiteProxyTarget() public API - stopAllProcesses() called on graceful shutdown Backend routes (nlpl.ts) - POST /api/sites/:id/nlpl/start — extract files from object storage, spawn process - POST /api/sites/:id/nlpl/stop — SIGTERM process, release port - GET /api/sites/:id/nlpl/status — live status, port, pid, restart count - GET /api/sites/:id/nlpl/logs?tail=N — ring buffer tail (max 500) - GET /api/nlpl/runtime-info — interpreter availability, version, port range - GET /api/admin/processes — all running processes (requireAdmin) - Route registered in index.ts Host router - Dynamic sites (nlpl/node/python/dynamic) proxied to process port - 503 maintenance page if process not yet started - 502 bad gateway on proxy error Frontend (NlplPanel.tsx) - Status badge with animated dot: running/starting/crashed/stopped - Process detail grid: PID, port, uptime, restart count - Start/stop/restart buttons with loading state - Entry file input (defaults to server.nlpl) - Collapsible log viewer polling /nlpl/logs every 3s, auto-scroll, colour-coded - NLPL interpreter availability warning with install instructions - Runtime version footer (NLPL version, Python version, port range) - Wired into DeploySite for siteType nlpl|dynamic Examples (docs/nlpl-examples/) - hello-world/server.nlpl: minimal one-function HTTP server - router/server.nlpl: path-based routing, JSON API response - README.md: deployment guide, env vars, operator setup, limitations Config - .env.example: NLPL_INTERPRETER_PATH, PYTHON_BIN, DYNAMIC_PORT_START, DYNAMIC_PORT_END, DYNAMIC_MAX_RESTARTS documented - migrations/0000: site_type enum updated to include 'nlpl' - lib/db/src/schema/sites.ts: 'nlpl' added to siteTypeEnum
1 parent 0b75a27 commit 4377c13

File tree

13 files changed

+1267
-2
lines changed

13 files changed

+1267
-2
lines changed

.env.example

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,20 @@ EMAIL_FROM_NAME=FedHost
203203
ANALYTICS_RETENTION_DAYS=90
204204
FORM_RETENTION_DAYS=365
205205
AUDIT_LOG_RETENTION_DAYS=365
206+
207+
# ── Dynamic Site Hosting (NLPL / Node.js process management) ──────────────────
208+
# Path to the NLPL interpreter (src/main.py in the NLPL repo).
209+
# Install: git clone https://github.com/Zajfan/NLPL /opt/nlpl
210+
NLPL_INTERPRETER_PATH=/opt/nlpl/src/main.py
211+
212+
# Python executable used to run the NLPL interpreter
213+
PYTHON_BIN=python3
214+
215+
# Port range allocated to dynamic site processes.
216+
# Each running site gets one port from this range.
217+
# Ensure these ports are NOT exposed externally — only the Node proxy accesses them.
218+
DYNAMIC_PORT_START=9000
219+
DYNAMIC_PORT_END=9999
220+
221+
# Maximum restarts before a crashed process is abandoned (default: 5)
222+
DYNAMIC_MAX_RESTARTS=5

artifacts/api-server/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { startGossipPusher, stopGossipPusher } from "./routes/gossip";
88
import { getRedisClient, closeRedis } from "./lib/redis";
99
import { startSyncRetryQueue, stopSyncRetryQueue } from "./lib/syncRetryQueue";
1010
import { startAcmeRenewalScheduler, stopAcmeRenewalScheduler } from "./lib/acme";
11+
import { stopAllProcesses } from "./lib/processManager";
1112
import { startSiteHealthMonitor, stopSiteHealthMonitor } from "./lib/siteHealthMonitor";
1213
import { startMetricsCollector, stopMetricsCollector } from "./lib/metricsCollector";
1314
import { startWebhookRetryProcessor, stopWebhookRetryProcessor } from "./lib/webhooks";
Lines changed: 364 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,364 @@
1+
/**
2+
* Dynamic process manager.
3+
*
4+
* Manages persistent server processes for dynamic sites (NLPL, Node.js, etc.).
5+
* Each dynamic site gets:
6+
* - A unique port allocated from the range DYNAMIC_PORT_START..DYNAMIC_PORT_END
7+
* - A supervised child process that is restarted on crash (up to MAX_RESTARTS)
8+
* - Request proxying via http-proxy-middleware in the host router
9+
*
10+
* Supported runtimes:
11+
* nlpl — python src/main.py server.nlpl (requires NLPL_INTERPRETER_PATH)
12+
* node — node server.js (requires Node.js in PATH)
13+
* python — python server.py (requires Python 3 in PATH)
14+
*
15+
* Process lifecycle:
16+
* start → allocate port → spawn process → health check → mark ready
17+
* crash → log → wait backoff → restart (up to MAX_RESTARTS)
18+
* stop → SIGTERM → wait 5s → SIGKILL
19+
* scale → (future: multiple processes per site behind internal load balancer)
20+
*
21+
* Security:
22+
* - Processes run as a non-root user (DYNAMIC_PROCESS_USER, default: "nobody")
23+
* - Environment is sanitised — no credentials, no DB URLs passed to site processes
24+
* - Sites communicate via HTTP only — no Unix socket access
25+
* - Resource limits: MAX_MEMORY_MB, MAX_CPU_PCT per process
26+
*
27+
* This module is intentionally not a full container orchestrator.
28+
* For true isolation, wrap each process in a Docker container or
29+
* use a platform like Fly.io that provides per-process VMs.
30+
* This implementation is appropriate for trusted operators running
31+
* sites for known users, not untrusted arbitrary code.
32+
*/
33+
34+
import { spawn, type ChildProcess } from "child_process";
35+
import { createServer } from "net";
36+
import path from "path";
37+
import fs from "fs";
38+
import http from "http";
39+
import logger from "./logger";
40+
41+
// ── Configuration ─────────────────────────────────────────────────────────────
42+
43+
const PORT_START = parseInt(process.env.DYNAMIC_PORT_START ?? "9000");
44+
const PORT_END = parseInt(process.env.DYNAMIC_PORT_END ?? "9999");
45+
const MAX_RESTARTS = parseInt(process.env.DYNAMIC_MAX_RESTARTS ?? "5");
46+
const BACKOFF_BASE_MS = 2_000;
47+
48+
// Path to the NLPL interpreter — the cloned NLPL repo's src/main.py
49+
const NLPL_INTERPRETER = process.env.NLPL_INTERPRETER_PATH ?? "/opt/nlpl/src/main.py";
50+
// Python executable (python3 or python depending on the system)
51+
const PYTHON_BIN = process.env.PYTHON_BIN ?? "python3";
52+
53+
// ── Port pool ─────────────────────────────────────────────────────────────────
54+
55+
const allocatedPorts = new Set<number>();
56+
57+
async function findFreePort(): Promise<number> {
58+
for (let port = PORT_START; port <= PORT_END; port++) {
59+
if (allocatedPorts.has(port)) continue;
60+
61+
// Double-check the port isn't in use by something else
62+
const free = await new Promise<boolean>((resolve) => {
63+
const server = createServer();
64+
server.listen(port, "127.0.0.1", () => {
65+
server.close(() => resolve(true));
66+
});
67+
server.on("error", () => resolve(false));
68+
});
69+
70+
if (free) {
71+
allocatedPorts.add(port);
72+
return port;
73+
}
74+
}
75+
throw new Error(`No free ports in range ${PORT_START}${PORT_END}`);
76+
}
77+
78+
function releasePort(port: number): void {
79+
allocatedPorts.delete(port);
80+
}
81+
82+
// ── Per-process log ring buffer ───────────────────────────────────────────────
83+
// Stores the last LOG_BUFFER_SIZE lines of stdout+stderr per site process.
84+
// Never grows unboundedly — oldest lines are dropped when full.
85+
86+
const LOG_BUFFER_SIZE = 500;
87+
const processLogs = new Map<number, string[]>(); // siteId → last N lines
88+
89+
function appendProcessLog(siteId: number, line: string): void {
90+
if (!processLogs.has(siteId)) processLogs.set(siteId, []);
91+
const buf = processLogs.get(siteId)!;
92+
buf.push(`[${new Date().toISOString()}] ${line}`);
93+
if (buf.length > LOG_BUFFER_SIZE) buf.splice(0, buf.length - LOG_BUFFER_SIZE);
94+
}
95+
96+
export function getProcessLogs(siteId: number, tail = 100): string[] {
97+
const buf = processLogs.get(siteId) ?? [];
98+
return buf.slice(-Math.min(tail, LOG_BUFFER_SIZE));
99+
}
100+
101+
function clearProcessLogs(siteId: number): void {
102+
processLogs.delete(siteId);
103+
}
104+
105+
export type RuntimeType = "nlpl" | "node" | "python";
106+
107+
export interface ProcessEntry {
108+
siteId: number;
109+
siteDomain: string;
110+
port: number;
111+
runtime: RuntimeType;
112+
workDir: string;
113+
entryFile: string;
114+
process: ChildProcess | null;
115+
status: "starting" | "running" | "crashed" | "stopped";
116+
restartCount: number;
117+
startedAt: Date | null;
118+
lastCrashAt: Date | null;
119+
pid: number | null;
120+
}
121+
122+
const processes = new Map<number, ProcessEntry>(); // siteId → entry
123+
124+
// ── Build the command for a given runtime ─────────────────────────────────────
125+
126+
function buildCommand(entry: ProcessEntry): { cmd: string; args: string[] } {
127+
const entryPath = path.join(entry.workDir, entry.entryFile);
128+
129+
switch (entry.runtime) {
130+
case "nlpl":
131+
return {
132+
cmd: PYTHON_BIN,
133+
args: [NLPL_INTERPRETER, entryPath],
134+
};
135+
case "node":
136+
return {
137+
cmd: "node",
138+
args: [entryPath],
139+
};
140+
case "python":
141+
return {
142+
cmd: PYTHON_BIN,
143+
args: [entryPath],
144+
};
145+
}
146+
}
147+
148+
// ── Spawn a process ───────────────────────────────────────────────────────────
149+
150+
async function spawnProcess(entry: ProcessEntry): Promise<void> {
151+
const { cmd, args } = buildCommand(entry);
152+
153+
// Sanitised environment: site gets PORT and a subset of safe vars only
154+
// NEVER pass DATABASE_URL, REDIS_URL, API keys, or any credentials
155+
const env: NodeJS.ProcessEnv = {
156+
PORT: String(entry.port),
157+
NODE_ENV: "production",
158+
PATH: process.env.PATH ?? "/usr/local/bin:/usr/bin:/bin",
159+
HOME: process.env.HOME ?? "/tmp",
160+
// NLPL-specific
161+
NLPL_ENV: "production",
162+
SITE_DOMAIN: entry.siteDomain,
163+
};
164+
165+
logger.info(
166+
{ siteId: entry.siteId, domain: entry.siteDomain, cmd, port: entry.port, runtime: entry.runtime },
167+
"[process-manager] Spawning process",
168+
);
169+
170+
const child = spawn(cmd, args, {
171+
cwd: entry.workDir,
172+
env,
173+
stdio: ["ignore", "pipe", "pipe"],
174+
// uid/gid: could set to nobody (65534) but requires running as root first
175+
// For now, inherit the process user (should be non-root per Docker setup)
176+
});
177+
178+
entry.process = child;
179+
entry.pid = child.pid ?? null;
180+
entry.status = "starting";
181+
entry.startedAt = new Date();
182+
183+
// Pipe stdout/stderr to logger AND ring buffer
184+
child.stdout?.on("data", (data: Buffer) => {
185+
const line = data.toString().trim();
186+
appendProcessLog(entry.siteId, line);
187+
logger.debug({ siteId: entry.siteId, domain: entry.siteDomain }, `[site-process] ${line}`);
188+
});
189+
child.stderr?.on("data", (data: Buffer) => {
190+
const line = data.toString().trim();
191+
appendProcessLog(entry.siteId, `ERR: ${line}`);
192+
logger.warn({ siteId: entry.siteId, domain: entry.siteDomain }, `[site-process:err] ${line}`);
193+
});
194+
195+
child.on("exit", (code, signal) => {
196+
logger.warn(
197+
{ siteId: entry.siteId, domain: entry.siteDomain, code, signal, restarts: entry.restartCount },
198+
"[process-manager] Process exited",
199+
);
200+
entry.status = "crashed";
201+
entry.lastCrashAt = new Date();
202+
entry.process = null;
203+
entry.pid = null;
204+
205+
if (entry.restartCount < MAX_RESTARTS) {
206+
const delay = BACKOFF_BASE_MS * Math.pow(2, entry.restartCount);
207+
entry.restartCount++;
208+
logger.info(
209+
{ siteId: entry.siteId, delayMs: delay, attempt: entry.restartCount },
210+
"[process-manager] Scheduling restart",
211+
);
212+
setTimeout(() => {
213+
if (processes.has(entry.siteId)) {
214+
spawnProcess(entry).catch((err) =>
215+
logger.error({ err, siteId: entry.siteId }, "[process-manager] Restart failed"),
216+
);
217+
}
218+
}, delay);
219+
} else {
220+
logger.error(
221+
{ siteId: entry.siteId, domain: entry.siteDomain },
222+
"[process-manager] Max restarts reached — giving up",
223+
);
224+
entry.status = "stopped";
225+
}
226+
});
227+
228+
// Wait for the process to start listening on PORT
229+
await waitForPort(entry.port, 15_000);
230+
entry.status = "running";
231+
232+
logger.info(
233+
{ siteId: entry.siteId, domain: entry.siteDomain, port: entry.port, pid: entry.pid },
234+
"[process-manager] Process ready",
235+
);
236+
}
237+
238+
// ── Wait for a process to bind a port ─────────────────────────────────────────
239+
240+
function waitForPort(port: number, timeoutMs: number): Promise<void> {
241+
return new Promise((resolve, reject) => {
242+
const deadline = Date.now() + timeoutMs;
243+
244+
function probe() {
245+
const sock = new http.ClientRequest({ host: "127.0.0.1", port, path: "/", method: "HEAD" });
246+
sock.on("response", () => { sock.destroy(); resolve(); });
247+
sock.on("error", () => {
248+
if (Date.now() > deadline) {
249+
reject(new Error(`Process did not bind port ${port} within ${timeoutMs}ms`));
250+
} else {
251+
setTimeout(probe, 500);
252+
}
253+
});
254+
sock.end();
255+
}
256+
257+
probe();
258+
});
259+
}
260+
261+
// ── Public API ────────────────────────────────────────────────────────────────
262+
263+
export async function startSiteProcess(opts: {
264+
siteId: number;
265+
siteDomain: string;
266+
runtime: RuntimeType;
267+
workDir: string;
268+
entryFile: string;
269+
}): Promise<{ port: number }> {
270+
const existing = processes.get(opts.siteId);
271+
if (existing && (existing.status === "running" || existing.status === "starting")) {
272+
return { port: existing.port };
273+
}
274+
275+
// Validate that the entry file exists
276+
const entryPath = path.join(opts.workDir, opts.entryFile);
277+
if (!fs.existsSync(entryPath)) {
278+
throw new Error(`Entry file not found: ${entryPath}`);
279+
}
280+
281+
// Validate NLPL interpreter is available
282+
if (opts.runtime === "nlpl" && !fs.existsSync(NLPL_INTERPRETER)) {
283+
throw new Error(
284+
`NLPL interpreter not found at ${NLPL_INTERPRETER}. ` +
285+
`Set NLPL_INTERPRETER_PATH or run: git clone https://github.com/Zajfan/NLPL /opt/nlpl`,
286+
);
287+
}
288+
289+
const port = await findFreePort();
290+
291+
const entry: ProcessEntry = {
292+
siteId: opts.siteId,
293+
siteDomain: opts.siteDomain,
294+
port,
295+
runtime: opts.runtime,
296+
workDir: opts.workDir,
297+
entryFile: opts.entryFile,
298+
process: null,
299+
status: "starting",
300+
restartCount: 0,
301+
startedAt: null,
302+
lastCrashAt: null,
303+
pid: null,
304+
};
305+
306+
processes.set(opts.siteId, entry);
307+
308+
await spawnProcess(entry);
309+
310+
return { port };
311+
}
312+
313+
export function stopSiteProcess(siteId: number): void {
314+
const entry = processes.get(siteId);
315+
if (!entry) return;
316+
317+
logger.info({ siteId, domain: entry.siteDomain }, "[process-manager] Stopping process");
318+
319+
entry.status = "stopped";
320+
const proc = entry.process;
321+
if (proc) {
322+
entry.process = null;
323+
proc.kill("SIGTERM");
324+
// Force-kill after 5 seconds if it doesn't exit cleanly
325+
setTimeout(() => {
326+
try { proc.kill("SIGKILL"); } catch { /* already dead */ }
327+
}, 5_000);
328+
}
329+
330+
releasePort(entry.port);
331+
processes.delete(siteId);
332+
// Keep logs for 5 minutes after stop so the UI can still show them
333+
setTimeout(() => clearProcessLogs(siteId), 5 * 60_000);
334+
}
335+
336+
export function getSiteProcess(siteId: number): ProcessEntry | undefined {
337+
return processes.get(siteId);
338+
}
339+
340+
export function getSiteProxyTarget(siteId: number): string | null {
341+
const entry = processes.get(siteId);
342+
if (!entry || entry.status !== "running") return null;
343+
return `http://127.0.0.1:${entry.port}`;
344+
}
345+
346+
export function getAllProcessStats() {
347+
return [...processes.values()].map((e) => ({
348+
siteId: e.siteId,
349+
domain: e.siteDomain,
350+
runtime: e.runtime,
351+
port: e.port,
352+
status: e.status,
353+
pid: e.pid,
354+
restartCount: e.restartCount,
355+
startedAt: e.startedAt?.toISOString(),
356+
lastCrashAt: e.lastCrashAt?.toISOString(),
357+
}));
358+
}
359+
360+
export function stopAllProcesses(): void {
361+
for (const siteId of processes.keys()) {
362+
stopSiteProcess(siteId);
363+
}
364+
}

0 commit comments

Comments
 (0)