Skip to content

Commit a3f217e

Browse files
author
The No Hands Company
committed
fix+feat: comprehensive improvements across every feature
2FA — actually enforced at login - auth.ts: OIDC callback checks totpCredentialsTable on every login If 2FA is enabled, creates a pending session and redirects to /2fa-challenge instead of completing login — 2FA can no longer be bypassed - twoFactor.ts: POST /auth/2fa/complete endpoint upgrades pending → full session - Backup code consumption now uses SELECT FOR UPDATE inside a DB transaction preventing race conditions where two simultaneous requests could consume the same backup code - Session storage removed from setup route (secret returned to client directly) - 2FA setup: removed redundant session mutation Deploy environment — actually sent in API call - DeploySite.tsx: environment state now included in POST /deploy body - Toast message reflects staging vs production environment Transfer tokens — Redis-backed, survive restarts - transfer.ts: pendingTransfers Map replaced with Redis-backed store getTransfer/storeTransfer/deleteTransfer helpers with Redis primary + in-memory fallback when Redis is unavailable - 24h TTL enforced in Redis with EX parameter Prometheus gauges — now actually updated - metricsCollector.ts: new background job, runs every 30s Updates sitesTotal (by status), federationPeersTotal (by status), syncQueueDepth, cacheEntries (domain/file) from live DB/in-memory state - syncRetryQueue.ts: getSyncQueueDepth() exported - index.ts: startMetricsCollector / stopMetricsCollector wired monthlyBandwidthGb rollup — now calculated - analyticsFlush.ts: after every buffer flush, runs SQL to update sites.monthly_bandwidth_gb and sites.hit_count from site_analytics table Uses current calendar month window; resets automatically on month boundary Build pipeline improvements - Parallel uploads: files now uploaded 8 at a time (was sequential) - Environment variable injection: envVars object passed to build process Dangerous server secrets stripped before env is passed to subprocess - installCommand override: operators can bypass auto-detection - buildEnv sanitization: removes SMTP_PASS, DATABASE_URL, REDIS_URL etc. Analytics export + referrer breakdown - GET /api/sites/:id/analytics/export?period=7d|30d|all — CSV download Columns: hour, hits, bytes_served, unique_ips - GET /api/sites/:id/analytics/referrers?period=7d|30d — aggregated referrers Merges topReferrers JSONB across all hourly rows, returns top 50 Redirect rules — query string + regex matching - matchRedirectPattern: complete rewrite supporting: /page?utm_source=email — exact query key=value /page?ref=* — wildcard value (captured as q_ref param) /page?key — key must be present (any value) /page?!logged_in — negation (key must be absent) /page?a=1&!b — multiple constraints (all must pass) ^/regex.*$ — raw regex with named capture groups (?<id>\d+) - All callers updated to pass req query string - tests/unit/redirectPattern.test.ts: 25 test cases covering all patterns CLI additions - fh logs <site-id> — list recent builds - fh logs <site-id> --build <id> — view full build log - fh logs <site-id> --build <id> --follow — poll while running - fh build <site-id> — trigger git build pipeline --git-url, --branch, --command, --output, --env KEY=VAL, --install, --staging --wait: stream logs until complete - fh forms <site-id> — list form submissions --form, --limit, --export <file.csv>, --json, --unread Frontend pages - /settings/2fa — TwoFactorSettings: full setup flow, QR code display, backup code management, disable/regenerate - /sites/:id/forms — FormInbox: three-pane email-client layout, form filter sidebar, submission detail, CSV export, mark read, delete - /sites/:id/builds — BuildHistory: live-polling build list, log viewer, trigger new build form, auto-refresh while build running - MySites: added Forms (Inbox icon) and Builds (GitBranch icon) buttons to every site card alongside Analytics and Settings DB schemas added - emailQueue.ts: emailQueueTable + siteHealthChecksTable - All new tables exported from schema/index.ts
1 parent 2cef2d0 commit a3f217e

File tree

28 files changed

+1806
-148
lines changed

28 files changed

+1806
-148
lines changed

artifacts/api-server/src/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import { getRedisClient, closeRedis } from "./lib/redis";
99
import { startSyncRetryQueue, stopSyncRetryQueue } from "./lib/syncRetryQueue";
1010
import { startAcmeRenewalScheduler, stopAcmeRenewalScheduler } from "./lib/acme";
1111
import { startSiteHealthMonitor, stopSiteHealthMonitor } from "./lib/siteHealthMonitor";
12+
import { startMetricsCollector, stopMetricsCollector } from "./lib/metricsCollector";
13+
import { startEmailQueue, stopEmailQueue } from "./lib/email";
1214
import { startOrphanCleanup, stopOrphanCleanup } from "./lib/orphanCleanup";
1315
import { db, sessionsTable } from "@workspace/db";
1416
import { lt } from "drizzle-orm";
@@ -68,6 +70,8 @@ function gracefulShutdown(server: http.Server, signal: string): void {
6870
stopSyncRetryQueue();
6971
stopAcmeRenewalScheduler();
7072
stopSiteHealthMonitor();
73+
stopMetricsCollector();
74+
stopEmailQueue();
7175
stopOrphanCleanup();
7276
await closeRedis();
7377
const { pool } = await import("@workspace/db");
@@ -105,6 +109,8 @@ ensureLocalNode()
105109
startSyncRetryQueue();
106110
startAcmeRenewalScheduler();
107111
startSiteHealthMonitor();
112+
startMetricsCollector();
113+
startEmailQueue();
108114
startOrphanCleanup();
109115

110116
// Initialise Redis connection (optional — falls back to in-memory if not configured)

artifacts/api-server/src/lib/analyticsFlush.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,35 @@ export async function flushAnalyticsBuffer(): Promise<void> {
119119
});
120120

121121
logger.debug({ flushed: rows.length, buckets: buckets.size }, "Analytics buffer flushed");
122+
123+
// ── Update per-site bandwidth + hit totals ────────────────────────────────
124+
// Roll up bytesServed into sites.monthly_bandwidth_gb for the usage dashboard.
125+
// We use a monthly window: reset on the 1st of each month by tracking via
126+
// the current month's analytics rows rather than a running counter.
127+
try {
128+
const now = new Date();
129+
const monthStart = new Date(now.getFullYear(), now.getMonth(), 1);
130+
131+
await db.execute(sql`
132+
UPDATE sites s
133+
SET
134+
monthly_bandwidth_gb = (
135+
SELECT COALESCE(SUM(bytes_served), 0) / (1024.0 * 1024 * 1024)
136+
FROM site_analytics
137+
WHERE site_id = s.id AND hour >= ${monthStart}
138+
),
139+
hit_count = (
140+
SELECT COALESCE(SUM(hits), 0)
141+
FROM site_analytics
142+
WHERE site_id = s.id
143+
)
144+
WHERE s.id IN (
145+
SELECT DISTINCT site_id FROM site_analytics WHERE hour >= ${monthStart}
146+
)
147+
`);
148+
} catch (err) {
149+
logger.warn({ err }, "Failed to update site bandwidth/hit totals");
150+
}
122151
}
123152

124153
let flushTimer: NodeJS.Timeout | null = null;

artifacts/api-server/src/lib/email.ts

Lines changed: 77 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,17 @@
11
/**
22
* Email notification system.
33
*
4-
* Sends transactional emails for platform events via any SMTP provider
5-
* (Resend, Postmark, SendGrid SMTP, AWS SES, self-hosted Postfix, etc.).
4+
* All emails go through a persistent queue (email_queue table) with
5+
* exponential backoff retry. SMTP failures never cause request failures.
66
*
7-
* Configuration (all optional — emails are silently skipped if not configured):
8-
* SMTP_HOST — SMTP server hostname (e.g. smtp.resend.com)
9-
* SMTP_PORT — SMTP port (default: 587)
10-
* SMTP_SECURE — "true" for TLS on port 465
11-
* SMTP_USER — SMTP username
12-
* SMTP_PASS — SMTP password / API key
13-
* EMAIL_FROM — From address (default: noreply@<PUBLIC_DOMAIN>)
14-
* EMAIL_FROM_NAME — From display name (default: FedHost)
15-
*
16-
* Events:
17-
* - deploy.success — site deployed successfully
18-
* - deploy.failed — deployment failed
19-
* - cert.expiring — TLS certificate expiring in <30 days
20-
* - cert.renewed — TLS certificate renewed
21-
* - node.offline — federation node went offline
22-
* - invitation — user invited to a site
23-
* - site.deleted — site deleted (confirmation)
7+
* Queue processing runs every 30 seconds. Failed emails are retried up to
8+
* 5 times with delays: 1m → 5m → 15m → 1h → 6h. After 5 failures the
9+
* email is marked failed and never retried.
2410
*/
2511

2612
import nodemailer, { type Transporter } from "nodemailer";
13+
import { db, emailQueueTable } from "@workspace/db";
14+
import { isNull, lte, lt, eq, sql } from "drizzle-orm";
2715
import logger from "./logger";
2816

2917
// ── Transport ──────────────────────────────────────────────────────────────────
@@ -34,24 +22,15 @@ function getTransporter(): Transporter | null {
3422
if (transporter) return transporter;
3523

3624
const host = process.env.SMTP_HOST;
37-
if (!host) return null; // email not configured — all sends are no-ops
25+
if (!host) return null;
3826

3927
const port = parseInt(process.env.SMTP_PORT ?? "587", 10);
4028
const secure = process.env.SMTP_SECURE === "true";
4129

4230
transporter = nodemailer.createTransport({
43-
host,
44-
port,
45-
secure,
46-
auth: process.env.SMTP_USER ? {
47-
user: process.env.SMTP_USER,
48-
pass: process.env.SMTP_PASS ?? "",
49-
} : undefined,
50-
pool: true,
51-
maxConnections: 5,
52-
maxMessages: 100,
53-
rateDelta: 1000,
54-
rateLimit: 10,
31+
host, port, secure,
32+
auth: process.env.SMTP_USER ? { user: process.env.SMTP_USER, pass: process.env.SMTP_PASS ?? "" } : undefined,
33+
pool: true, maxConnections: 5, maxMessages: 100,
5534
});
5635

5736
return transporter;
@@ -64,20 +43,78 @@ function fromAddress(): string {
6443
return `"${name}" <${addr}>`;
6544
}
6645

67-
async function sendMail(opts: { to: string; subject: string; html: string; text: string }): Promise<boolean> {
68-
const t = getTransporter();
69-
if (!t) return false; // silently skip — SMTP not configured
46+
// Backoff delays per attempt (ms)
47+
const BACKOFF = [60_000, 300_000, 900_000, 3_600_000, 21_600_000];
7048

49+
/** Enqueue an email. Returns immediately — actual sending is async. */
50+
async function enqueue(opts: { to: string; subject: string; html: string; text: string }): Promise<void> {
51+
if (!process.env.SMTP_HOST) return; // email not configured, skip silently
7152
try {
72-
await t.sendMail({ from: fromAddress(), ...opts });
73-
logger.info({ to: opts.to, subject: opts.subject }, "[email] Sent");
74-
return true;
53+
await db.insert(emailQueueTable).values(opts);
7554
} catch (err) {
76-
logger.error({ err, to: opts.to, subject: opts.subject }, "[email] Failed to send");
77-
return false;
55+
logger.error({ err, to: opts.to }, "[email] Failed to enqueue");
7856
}
7957
}
8058

59+
/** Process pending emails from the queue. Called by the email flush job. */
60+
export async function processEmailQueue(): Promise<void> {
61+
const t = getTransporter();
62+
if (!t) return;
63+
64+
const pending = await db
65+
.select()
66+
.from(emailQueueTable)
67+
.where(sql`${emailQueueTable.sentAt} IS NULL AND ${emailQueueTable.failedAt} IS NULL AND ${emailQueueTable.nextAttempt} <= NOW()`)
68+
.limit(20);
69+
70+
for (const item of pending) {
71+
try {
72+
await t.sendMail({ from: fromAddress(), to: item.to, subject: item.subject, html: item.html, text: item.text });
73+
74+
await db.update(emailQueueTable)
75+
.set({ sentAt: new Date() })
76+
.where(eq(emailQueueTable.id, item.id));
77+
78+
logger.info({ to: item.to, subject: item.subject }, "[email] Sent");
79+
} catch (err: any) {
80+
const attempts = item.attempts + 1;
81+
if (attempts >= item.maxAttempts) {
82+
await db.update(emailQueueTable)
83+
.set({ attempts, failedAt: new Date(), error: err.message })
84+
.where(eq(emailQueueTable.id, item.id));
85+
logger.error({ to: item.to, attempts }, "[email] Permanently failed");
86+
} else {
87+
const delay = BACKOFF[attempts - 1] ?? BACKOFF[BACKOFF.length - 1]!;
88+
const nextAttempt = new Date(Date.now() + delay);
89+
await db.update(emailQueueTable)
90+
.set({ attempts, nextAttempt, error: err.message })
91+
.where(eq(emailQueueTable.id, item.id));
92+
logger.warn({ to: item.to, attempts, nextAttemptIn: delay }, "[email] Retrying");
93+
}
94+
}
95+
}
96+
}
97+
98+
// Queue flush timer
99+
let emailTimer: NodeJS.Timeout | null = null;
100+
101+
export function startEmailQueue(): void {
102+
if (!process.env.SMTP_HOST) return;
103+
processEmailQueue().catch(() => {});
104+
emailTimer = setInterval(() => processEmailQueue().catch(() => {}), 30_000);
105+
logger.info("[email] Queue processor started");
106+
}
107+
108+
export function stopEmailQueue(): void {
109+
if (emailTimer) { clearInterval(emailTimer); emailTimer = null; }
110+
}
111+
112+
// sendMail is now just enqueue
113+
async function sendMail(opts: { to: string; subject: string; html: string; text: string }): Promise<boolean> {
114+
await enqueue(opts);
115+
return true;
116+
}
117+
81118
// ── HTML layout ───────────────────────────────────────────────────────────────
82119

83120
function layout(content: string, title: string): string {
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/**
2+
* Prometheus gauge collector.
3+
*
4+
* The HTTP metrics (requests_total, duration, active_requests) update
5+
* themselves via middleware. Business gauges need to be polled from the DB
6+
* because they represent state, not events.
7+
*
8+
* Runs every 30 seconds and updates:
9+
* fedhost_sites_total — by status (active/inactive/suspended)
10+
* fedhost_federation_peers_total — by status (active/offline/pending)
11+
* fedhost_sync_queue_depth — pending retry items
12+
* fedhost_cache_entries — domain and file LRU cache size
13+
*/
14+
15+
import { db, sitesTable, nodesTable } from "@workspace/db";
16+
import { sql } from "drizzle-orm";
17+
import {
18+
sitesTotal,
19+
federationPeersTotal,
20+
syncQueueDepth,
21+
cacheEntries,
22+
deploymentsTotal,
23+
storageOperationsTotal,
24+
} from "./metrics";
25+
import { getCacheStats } from "./domainCache";
26+
import { getSyncQueueDepth } from "./syncRetryQueue";
27+
import logger from "./logger";
28+
29+
const INTERVAL_MS = 30_000;
30+
let timer: NodeJS.Timeout | null = null;
31+
32+
async function collect(): Promise<void> {
33+
try {
34+
// ── Sites by status ───────────────────────────────────────────────────
35+
const siteCounts = await db
36+
.select({ status: sitesTable.status, count: sql<number>`COUNT(*)` })
37+
.from(sitesTable)
38+
.groupBy(sitesTable.status);
39+
40+
// Reset all labels first so removed statuses go to 0
41+
for (const status of ["active", "inactive", "suspended"]) {
42+
sitesTotal.set({ status }, 0);
43+
}
44+
for (const row of siteCounts) {
45+
sitesTotal.set({ status: row.status }, Number(row.count));
46+
}
47+
48+
// ── Federation peers by status ────────────────────────────────────────
49+
const peerCounts = await db
50+
.select({ status: nodesTable.status, count: sql<number>`COUNT(*)` })
51+
.from(nodesTable)
52+
.groupBy(nodesTable.status);
53+
54+
for (const status of ["active", "offline", "pending"]) {
55+
federationPeersTotal.set({ status }, 0);
56+
}
57+
for (const row of peerCounts) {
58+
federationPeersTotal.set({ status: row.status }, Number(row.count));
59+
}
60+
61+
// ── Sync retry queue depth ────────────────────────────────────────────
62+
const queueDepth = getSyncQueueDepth();
63+
syncQueueDepth.set(queueDepth);
64+
65+
// ── LRU cache sizes ───────────────────────────────────────────────────
66+
const stats = getCacheStats();
67+
cacheEntries.set({ cache_type: "domain" }, stats.domainEntries);
68+
cacheEntries.set({ cache_type: "file" }, stats.fileEntries);
69+
70+
} catch (err) {
71+
logger.warn({ err }, "[metrics-collector] Collection failed");
72+
}
73+
}
74+
75+
export function startMetricsCollector(): void {
76+
collect().catch(() => {});
77+
timer = setInterval(collect, INTERVAL_MS);
78+
logger.info({ intervalMs: INTERVAL_MS }, "[metrics-collector] Started");
79+
}
80+
81+
export function stopMetricsCollector(): void {
82+
if (timer) { clearInterval(timer); timer = null; }
83+
}

artifacts/api-server/src/lib/siteHealthMonitor.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,27 @@ async function runHealthChecks(): Promise<void> {
106106
if (result.status === "down") {
107107
logger.warn({ domain: site.domain, error: result.error }, "[site-health] Site is down");
108108
}
109+
110+
// Persist to DB for history (fire-and-forget)
111+
import("@workspace/db").then(({ db: _db, siteHealthChecksTable }) => {
112+
_db.insert(siteHealthChecksTable).values({
113+
siteId: site.id,
114+
status: result.status,
115+
httpStatus: result.httpStatus,
116+
responseMs: result.responseMs ?? null,
117+
error: result.error ?? null,
118+
checkedAt: new Date(result.checkedAt),
119+
}).catch(() => {});
120+
121+
// Alert if site transitioned to down
122+
const prev = healthResults.get(site.id);
123+
if (result.status === "down" && prev?.status !== "down") {
124+
// Notify site owner by email
125+
import("./email").then(({ emailSiteDown }) => {
126+
emailSiteDown?.({ siteId: site.id, domain: site.domain }).catch(() => {});
127+
}).catch(() => {});
128+
}
129+
}).catch(() => {});
109130
}
110131

111132
logger.debug({ checked: activeSites.length }, "[site-health] Health checks complete");

artifacts/api-server/src/lib/syncRetryQueue.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,7 @@ export function getSyncQueueStats() {
203203
}, {}),
204204
};
205205
}
206+
207+
export function getSyncQueueDepth(): number {
208+
return syncQueue.size;
209+
}

0 commit comments

Comments
 (0)