@@ -18,6 +18,30 @@ import type { AgentProvider, AgentQuery, ProviderEvent } from './providers/types
1818const POLL_INTERVAL_MS = 1000 ;
1919const ACTIVE_POLL_INTERVAL_MS = 500 ;
2020
21+ /**
22+ * Number of consecutive `database disk image is malformed` errors after which
23+ * the follow-up poll gives up and exits the process. At ACTIVE_POLL_INTERVAL_MS
24+ * = 500ms this is roughly 5 seconds — long enough to dodge a transient torn
25+ * read during a host write, short enough to recover quickly from a poisoned
26+ * page cache (host-sweep then respawns with a fresh mount).
27+ */
28+ const CORRUPTION_STREAK_EXIT = 10 ;
29+
30+ /**
31+ * True for SQLite errors that indicate a corrupt READ view — almost always a
32+ * cross-mount page-cache coherency issue on Docker Desktop macOS rather than
33+ * actual file damage (host-side integrity_check passes). Reopening the DB
34+ * handle inside this process does NOT recover; only a fresh container mount
35+ * does. Caller's job is to exit so host-sweep respawns the container.
36+ */
37+ export function isCorruptionError ( msg : string ) : boolean {
38+ return (
39+ msg . includes ( 'database disk image is malformed' ) ||
40+ msg . includes ( 'SQLITE_CORRUPT' ) ||
41+ msg . includes ( 'file is not a database' )
42+ ) ;
43+ }
44+
2145function log ( msg : string ) : void {
2246 console . error ( `[poll-loop] ${ msg } ` ) ;
2347}
@@ -291,6 +315,7 @@ async function processQuery(
291315 // will kill the container and messages get reset to pending.
292316 let pollInFlight = false ;
293317 let endedForCommand = false ;
318+ let corruptionStreak = 0 ;
294319 const pollHandle = setInterval ( ( ) => {
295320 if ( done || pollInFlight || endedForCommand ) return ;
296321 pollInFlight = true ;
@@ -362,6 +387,31 @@ async function processQuery(
362387 // path is not, so it needs its own.
363388 const errMsg = err instanceof Error ? err . message : String ( err ) ;
364389 log ( `Follow-up poll error: ${ errMsg } ` ) ;
390+
391+ // Detect SQLite cross-mount corruption (Docker Desktop macOS virtiofs /
392+ // gRPC-FUSE coherency bug — the kernel page cache for the inbound.db
393+ // bind mount can latch a torn snapshot mid-host-write, after which
394+ // every fresh openInboundDb() in this process sees the same broken
395+ // view. Reopening inside the container does NOT recover; only a fresh
396+ // container mount does. Exit so the host sweep respawns us.
397+ if ( isCorruptionError ( errMsg ) ) {
398+ corruptionStreak += 1 ;
399+ if ( corruptionStreak >= CORRUPTION_STREAK_EXIT ) {
400+ log (
401+ `Follow-up poll: ${ corruptionStreak } consecutive '${ errMsg } ' errors — ` +
402+ `inbound.db page cache is poisoned. Exiting so host respawns with a fresh mount.` ,
403+ ) ;
404+ // Stop touching the heartbeat so host-sweep stale detection fires
405+ // promptly even if exit() races with in-flight async work.
406+ done = true ;
407+ clearInterval ( pollHandle ) ;
408+ // Defer exit one tick so this log line flushes through Docker's
409+ // log driver before the process dies.
410+ setTimeout ( ( ) => process . exit ( 75 ) , 100 ) ;
411+ }
412+ } else {
413+ corruptionStreak = 0 ;
414+ }
365415 } finally {
366416 pollInFlight = false ;
367417 }
0 commit comments