Skip to content

Commit edab297

Browse files
lewisnsmithclaude
andcommitted
Fix 3 bugs, 12 code review issues, and repo hygiene
Bug fixes (documented in docs/CHANGELOG.md): - Fix close() flush fire-and-forget: async close() awaits flush, closeSync() with writeFileSync for signal handlers - Fix hallucination heuristic concurrency: replace scalar state with ordered recentResponses[] array, add 30s time window - Replace synchronous execSync('df') with async fs.statfs() Code review fixes: - SIGTERM/SIGINT handlers now call process.exit() - flush() handles concurrent calls via flushPromise tracking - Pending retries flushed to client on upstream exit - tailSession uses line count instead of bytes/100 heuristic - readLogEntries/listAlerts resilient to malformed JSONL lines - Shell-safe quoting in generated claude mcp add-json commands - Guard against missing error.message from upstream - Retry setTimeout checks stream alive before writing - tailSession uses process.once for SIGINT handler - sessionId includes random suffix to prevent collisions Repo hygiene: - Move flight-prd.md and plan.md to docs/ - Create docs/CHANGELOG.md - Add bench/throughput.ts benchmark script - Update README: clone+build+link install, CI badge, actual benchmark numbers, remove AgentLens, update doc links - Remove redundant setTimeout sleeps in tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 88ac208 commit edab297

13 files changed

Lines changed: 326 additions & 94 deletions

README.md

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
![CI](https://github.com/lewisnsmith/flight/actions/workflows/ci.yml/badge.svg)
2+
13
# Flight Proxy
24

35
**A local MCP flight recorder and research instrument for AI coding agents.**
@@ -42,8 +44,9 @@ The M3 experience exposed a structural problem: AI systems producing confident b
4244
## Quick Start
4345

4446
```bash
45-
# Install
46-
npm install -g flight-proxy
47+
# Install from source
48+
git clone https://github.com/lewisnsmith/flight.git
49+
cd flight && npm install && npm run build && npm link
4750

4851
# Discover your existing MCP servers and wrap them with Flight
4952
flight init claude
@@ -188,7 +191,7 @@ print(f"Calls: {len(entries)}, Errors: {len(errors)}, Hallucination hints: {len(
188191
## Performance
189192
190193
- **<5ms** added latency per tool call (streaming NDJSON, fire-and-forget log writes)
191-
- **4,000+ calls/sec** sustained throughput (benchmarked)
194+
- **40,000+ calls/sec** sustained throughput ([benchmarked](./bench/throughput.ts))
192195
- **Backpressure-aware:** proxy never accumulates unbounded in-memory buffers
193196
- **Disk-safe:** disables logging gracefully if free space drops below 100MB
194197
- **Write queue:** 1,000 entries max; drops with warning under disk pressure, never stalls the proxy
@@ -210,15 +213,15 @@ print(f"Calls: {len(entries)}, Errors: {len(errors)}, Hallucination hints: {len(
210213
| **Flight Proxy** |||| planned ||
211214
| Reticle ||||||
212215
| MCP Inspector | — (browser) |||||
213-
| AgentLens ||||||
214216
| Langfuse/Moesif ||| partial |||
215217
216218
---
217219
218220
## Install
219221
220222
```bash
221-
npm install -g flight-proxy
223+
git clone https://github.com/lewisnsmith/flight.git
224+
cd flight && npm install && npm run build && npm link
222225
flight init claude
223226
```
224227
@@ -228,8 +231,9 @@ Requires Node.js 20+. No database, no cloud, no external dependencies.
228231
229232
## Documentation
230233
231-
- [`flight-prd.md`](./flight-prd.md) — full product requirements document
232-
- [`plan.md`](./plan.md) — sprint plan and roadmap
234+
- [`docs/flight-prd.md`](./docs/flight-prd.md) — full product requirements document
235+
- [`docs/plan.md`](./docs/plan.md) — sprint plan and roadmap
236+
- [`docs/CHANGELOG.md`](./docs/CHANGELOG.md) — iteration history
233237
234238
---
235239

bench/throughput.ts

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/**
2+
* Flight Proxy throughput benchmark.
3+
*
4+
* Spawns the proxy with a mock MCP server and measures:
5+
* - Small calls: 1000 x ~1KB requests → calls/sec
6+
* - Large calls: 50 x ~100KB requests → MB/sec
7+
*
8+
* Usage: npx tsx bench/throughput.ts
9+
*/
10+
11+
import { spawn } from "node:child_process";
12+
import { createInterface } from "node:readline";
13+
import { join } from "node:path";
14+
import { tmpdir } from "node:os";
15+
import { rm } from "node:fs/promises";
16+
17+
const MOCK_SERVER = join(import.meta.dirname, "..", "test", "mock-mcp-server.ts");
18+
const PROXY_MODULE = join(import.meta.dirname, "..", "src", "proxy.ts");
19+
20+
function createProxy(logDir: string) {
21+
const child = spawn("npx", ["tsx", "-e", `
22+
import { startProxy } from "${PROXY_MODULE.replace(/\\/g, "/")}";
23+
startProxy({
24+
command: "npx",
25+
args: ["tsx", "${MOCK_SERVER.replace(/\\/g, "/")}"],
26+
logDir: "${logDir.replace(/\\/g, "/")}",
27+
quiet: true,
28+
});
29+
`], { stdio: ["pipe", "pipe", "pipe"] });
30+
31+
let responseCount = 0;
32+
let totalResponseBytes = 0;
33+
const rl = createInterface({ input: child.stdout! });
34+
rl.on("line", (line) => {
35+
responseCount++;
36+
totalResponseBytes += Buffer.byteLength(line);
37+
});
38+
39+
function send(msg: Record<string, unknown>) {
40+
child.stdin!.write(JSON.stringify(msg) + "\n");
41+
}
42+
43+
function waitForResponses(count: number, timeoutMs = 60000): Promise<void> {
44+
return new Promise((resolve, reject) => {
45+
const start = Date.now();
46+
const check = () => {
47+
if (responseCount >= count) return resolve();
48+
if (Date.now() - start > timeoutMs) return reject(new Error(`Timeout: got ${responseCount}/${count}`));
49+
setTimeout(check, 10);
50+
};
51+
check();
52+
});
53+
}
54+
55+
return {
56+
send,
57+
waitForResponses,
58+
close: () => { child.stdin!.end(); child.kill(); },
59+
get count() { return responseCount; },
60+
get bytes() { return totalResponseBytes; },
61+
};
62+
}
63+
64+
async function benchSmallCalls() {
65+
const logDir = join(tmpdir(), `flight-bench-small-${Date.now()}`);
66+
const proxy = createProxy(logDir);
67+
const CALL_COUNT = 1000;
68+
69+
// Initialize
70+
proxy.send({
71+
jsonrpc: "2.0", id: 0, method: "initialize",
72+
params: { protocolVersion: "2024-11-05", capabilities: {}, clientInfo: { name: "bench", version: "1.0" } },
73+
});
74+
await proxy.waitForResponses(1);
75+
76+
const start = Date.now();
77+
78+
for (let i = 1; i <= CALL_COUNT; i++) {
79+
proxy.send({
80+
jsonrpc: "2.0", id: i, method: "tools/call",
81+
params: { name: "read_file", arguments: { path: `/file_${i}.ts` } },
82+
});
83+
}
84+
85+
await proxy.waitForResponses(CALL_COUNT + 1);
86+
const elapsed = Date.now() - start;
87+
const callsPerSec = Math.round(CALL_COUNT / (elapsed / 1000));
88+
89+
proxy.close();
90+
await rm(logDir, { recursive: true }).catch(() => {});
91+
92+
return { callCount: CALL_COUNT, elapsed, callsPerSec };
93+
}
94+
95+
async function benchLargeCalls() {
96+
const logDir = join(tmpdir(), `flight-bench-large-${Date.now()}`);
97+
const proxy = createProxy(logDir);
98+
const CALL_COUNT = 50;
99+
100+
// Initialize
101+
proxy.send({
102+
jsonrpc: "2.0", id: 0, method: "initialize",
103+
params: { protocolVersion: "2024-11-05", capabilities: {}, clientInfo: { name: "bench", version: "1.0" } },
104+
});
105+
await proxy.waitForResponses(1);
106+
107+
const start = Date.now();
108+
109+
for (let i = 1; i <= CALL_COUNT; i++) {
110+
proxy.send({
111+
jsonrpc: "2.0", id: i, method: "tools/call",
112+
params: { name: "list_dir", arguments: { path: `/dir_${i}` } },
113+
});
114+
}
115+
116+
await proxy.waitForResponses(CALL_COUNT + 1);
117+
const elapsed = Date.now() - start;
118+
const totalMB = proxy.bytes / (1024 * 1024);
119+
const mbPerSec = totalMB / (elapsed / 1000);
120+
121+
proxy.close();
122+
await rm(logDir, { recursive: true }).catch(() => {});
123+
124+
return { callCount: CALL_COUNT, elapsed, totalMB, mbPerSec };
125+
}
126+
127+
async function main() {
128+
console.log("Flight Proxy Throughput Benchmark");
129+
console.log("=================================\n");
130+
131+
console.log("Running small-call benchmark (1000 x ~1KB)...");
132+
const small = await benchSmallCalls();
133+
console.log(` ${small.callCount} calls in ${small.elapsed}ms → ${small.callsPerSec} calls/sec\n`);
134+
135+
console.log("Running large-call benchmark (50 x ~100KB)...");
136+
const large = await benchLargeCalls();
137+
console.log(` ${large.callCount} calls in ${large.elapsed}ms → ${large.mbPerSec.toFixed(2)} MB/sec\n`);
138+
139+
console.log("Summary");
140+
console.log("-------");
141+
console.log(` Small calls: ${small.callsPerSec} calls/sec`);
142+
console.log(` Large calls: ${large.mbPerSec.toFixed(2)} MB/sec`);
143+
}
144+
145+
main().catch((err) => {
146+
console.error("Benchmark failed:", err);
147+
process.exit(1);
148+
});

docs/CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Changelog
2+
3+
## v0.1.1 — Bug Fixes
4+
5+
### Fixed: Session logs truncated on exit (close flush was fire-and-forget)
6+
7+
`close()` called `flush()` but never awaited it. Since `flush()` does async `appendFile`, the process could exit before the final batch wrote to disk — silently losing the tail of the session log.
8+
9+
- `close()` now returns `Promise<void>` and awaits `flush()`
10+
- Added `closeSync()` using `writeFileSync` for signal handler paths (SIGTERM/SIGINT) where async isn't possible
11+
- Updated all callers in `proxy.ts` to await `close()` or use `closeSync()` as appropriate
12+
13+
### Fixed: False hallucination hints under concurrent requests
14+
15+
The hallucination heuristic used single scalar variables (`lastResponseWasError`, `lastErrorMethod`, `lastErrorToolName`). If the client sent requests A and B concurrently, interleaved responses would corrupt the state — the next client request could falsely trigger a hallucination hint.
16+
17+
- Replaced scalar state with an ordered list of recent server responses (`recentResponses[]`)
18+
- When a client request arrives, checks only the most recent server response for the error-then-different-tool pattern
19+
- Correctly handles interleaved responses without false positives
20+
21+
### Fixed: Synchronous disk space check blocking startup
22+
23+
`createSessionLogger` used `execSync('df ...')` to check available disk space. On slow disks or network mounts this could add visible latency to proxy startup.
24+
25+
- Replaced `execSync` + shell `df` command with `fs.statfs()` (async, native Node.js API)
26+
- Cross-platform: works on Windows unlike the previous `df`-based approach
27+
- No shell spawning overhead
File renamed without changes.
File renamed without changes.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"test:watch": "vitest",
1616
"lint": "eslint src/",
1717
"typecheck": "tsc --noEmit",
18+
"bench": "npx tsx bench/throughput.ts",
1819
"check": "npm run lint && npm run typecheck && npm run test",
1920
"prepublishOnly": "npm run build"
2021
},

src/init.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,11 @@ export async function initClaudeCode(options: ClaudeCodeInitOptions = {}): Promi
122122
const commands: string[] = [];
123123
for (const [name, server] of Object.entries(wrapped)) {
124124
const json = JSON.stringify(server);
125+
// Escape single quotes for shell safety: replace ' with '\''
126+
const escapedJson = json.replace(/'/g, "'\\''");
127+
const escapedName = name.replace(/'/g, "'\\''");
125128
const scopeFlag = scope === "project" ? " --scope project" : "";
126-
commands.push(`claude mcp add-json "${name}" '${json}'${scopeFlag}`);
129+
commands.push(`claude mcp add-json '${escapedName}' '${escapedJson}'${scopeFlag}`);
127130
}
128131

129132
// Also write snippet for reference

src/log-commands.ts

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,16 @@ function sessionIdFromFile(filename: string): string {
5353
async function readLogEntries(sessionFile: string): Promise<LogEntry[]> {
5454
const filePath = join(DEFAULT_LOG_DIR, sessionFile);
5555
const content = await readFile(filePath, "utf-8");
56-
return content
57-
.trim()
58-
.split("\n")
59-
.filter(Boolean)
60-
.map((line) => JSON.parse(line) as LogEntry);
56+
const entries: LogEntry[] = [];
57+
for (const line of content.trim().split("\n")) {
58+
if (!line) continue;
59+
try {
60+
entries.push(JSON.parse(line) as LogEntry);
61+
} catch {
62+
// Skip malformed lines (truncated writes, partial flushes)
63+
}
64+
}
65+
return entries;
6166
}
6267

6368
async function findSessionFile(sessionId?: string): Promise<string | null> {
@@ -138,16 +143,18 @@ export async function tailSession(sessionId?: string): Promise<void> {
138143
console.log(`${C.dim} Press Ctrl+C to stop${C.reset}\n`);
139144

140145
// Print existing entries
146+
let lastLineCount = 0;
141147
try {
142148
const entries = await readLogEntries(file);
143149
for (const entry of entries) {
144150
console.log(formatEntryLine(entry));
145151
}
152+
lastLineCount = entries.length;
146153
} catch {
147154
// File might be empty
148155
}
149156

150-
// Watch for new entries
157+
// Watch for new entries — track by line count, not byte size
151158
let lastSize = 0;
152159
try {
153160
const s = await stat(filePath);
@@ -164,21 +171,25 @@ export async function tailSession(sessionId?: string): Promise<void> {
164171
const content = await readFile(filePath, "utf-8");
165172
const lines = content.trim().split("\n").filter(Boolean);
166173

167-
// Only print new lines
168-
const allEntries = lines.map((l) => JSON.parse(l) as LogEntry);
169-
const newEntries = allEntries.slice(-Math.max(1, allEntries.length - Math.floor(lastSize / 100)));
170-
171-
for (const entry of newEntries) {
172-
console.log(formatEntryLine(entry));
174+
// Parse only genuinely new lines
175+
const newLines = lines.slice(lastLineCount);
176+
for (const line of newLines) {
177+
try {
178+
const entry = JSON.parse(line) as LogEntry;
179+
console.log(formatEntryLine(entry));
180+
} catch {
181+
// skip malformed lines during active writing
182+
}
173183
}
184+
lastLineCount = lines.length;
174185
lastSize = s.size;
175186
} catch {
176187
// ignore read errors during active writing
177188
}
178189
});
179190

180191
// Keep alive until Ctrl+C
181-
process.on("SIGINT", () => {
192+
process.once("SIGINT", () => {
182193
watcher.close();
183194
process.exit(0);
184195
});
@@ -283,11 +294,15 @@ export async function listAlerts(options: { limit?: number; session?: string } =
283294
return;
284295
}
285296

286-
let alerts = content
287-
.trim()
288-
.split("\n")
289-
.filter(Boolean)
290-
.map((line) => JSON.parse(line) as AlertEntry);
297+
let alerts: AlertEntry[] = [];
298+
for (const line of content.trim().split("\n")) {
299+
if (!line) continue;
300+
try {
301+
alerts.push(JSON.parse(line) as AlertEntry);
302+
} catch {
303+
// Skip malformed lines
304+
}
305+
}
291306

292307
if (options.session) {
293308
alerts = alerts.filter((a) => a.session_id.includes(options.session!));

0 commit comments

Comments
 (0)