Skip to content

Commit 8fbc5e7

Browse files
Merge pull request #193 from alanshurafa/contrib/alanshurafa/brain-backup
[recipes] Brain backup and export
2 parents f15cff6 + 3c9b598 commit 8fbc5e7

2 files changed

Lines changed: 163 additions & 46 deletions

File tree

recipes/brain-backup/README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Brain Backup and Export
22

3+
<div align="center">
4+
5+
![Community Contribution](https://img.shields.io/badge/OB1_COMMUNITY-Approved_Contribution-2ea44f?style=for-the-badge&logo=github)
6+
7+
**Created by [@alanshurafa](https://github.com/alanshurafa)**
8+
9+
</div>
10+
311
Export all Open Brain Supabase tables to local JSON files. The script paginates through PostgREST (1 000 rows per request), writes each table to a dated JSON file, and prints a summary.
412

513
## Prerequisites
@@ -31,10 +39,27 @@ Export all Open Brain Supabase tables to local JSON files. The script paginates
3139

3240
## Expected Result
3341

34-
After running the script you will have a `backup/` directory containing dated JSON exports of every Open Brain table (thoughts, entities, edges, thought_entities, reflections, ingestion_jobs, ingestion_items). The console output shows row counts and file sizes for each table, making it easy to verify the backup is complete.
42+
After running the script you will have a `backup/` directory containing dated JSON exports of every Open Brain table present in your project.
43+
44+
- `thoughts` is always backed up (required).
45+
- Optional companion tables — `entities`, `edges`, `thought_entities`, `ingestion_jobs`, `ingestion_items` — are backed up only if they exist. They ship with companion contributions (e.g. the entity-extraction and smart-ingest schemas). Stock Open Brain installs will see `skipped (table not present)` for those, which is expected.
46+
47+
The console output shows row counts and file sizes for each table, making it easy to verify the backup is complete.
3548

3649
## Tips
3750

3851
- Schedule the script with cron or Task Scheduler for automatic daily backups.
3952
- Commit the `backup/` directory to a private repo for versioned history.
4053
- The script streams rows to disk, so it handles large tables without running out of memory.
54+
55+
## Troubleshooting
56+
57+
- **`PostgREST error 404 on thoughts`** -- the script could reach the server but the `thoughts` table isn't visible to it. Only a PostgREST "schema cache" 404 (`code: "PGRST205"`) is treated as "table not present"; everything else is surfaced so you can diagnose it. Common causes:
58+
- Typo in `SUPABASE_URL` (for example pointing at `/v1` instead of the project root -- the script appends `/rest/v1` itself).
59+
- Supabase project is paused or deleted.
60+
- The `thoughts` table lives in a non-`public` schema that PostgREST isn't exposing.
61+
- You're using the `anon` key instead of the `service_role` key. The anon key can be restricted by RLS and return empty or 404 responses; service-role keys bypass RLS.
62+
- **`skipped (table not present)` for optional tables** -- expected on stock Open Brain installs. The optional tables ship with companion contributions (entity extraction, smart ingest).
63+
- **`PostgREST error 401`** -- `SUPABASE_SERVICE_ROLE_KEY` is wrong, revoked, or truncated.
64+
- **`PostgREST error 403`** -- unusual for service-role keys, which should bypass RLS. Double-check you're not using a custom-minted JWT with narrower claims.
65+
- **Script hangs or aborts after ~60s** -- set `FETCH_TIMEOUT_MS` to a larger value (milliseconds) if your project is on a slow tier or has very large tables.

recipes/brain-backup/backup-brain.mjs

Lines changed: 137 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,16 @@ const SCRIPT_DIR = process.cwd();
2323

2424
const PAGE_SIZE = 1000;
2525

26+
// Stock Open Brain only has `thoughts`. The other tables are from optional
27+
// companion contributions (entity extraction, smart ingest). Missing tables
28+
// are skipped at runtime so this recipe works against any Open Brain install.
2629
const TABLES = [
27-
{ name: "thoughts", orderBy: "id" },
28-
{ name: "entities", orderBy: "id" },
29-
{ name: "edges", orderBy: "id" },
30-
{ name: "thought_entities", orderBy: "thought_id,entity_id" },
31-
{ name: "reflections", orderBy: "id" },
32-
{ name: "ingestion_jobs", orderBy: "id" },
33-
{ name: "ingestion_items", orderBy: "id" },
30+
{ name: "thoughts", orderBy: "id", required: true },
31+
{ name: "entities", orderBy: "id", required: false },
32+
{ name: "edges", orderBy: "id", required: false },
33+
{ name: "thought_entities", orderBy: "thought_id,entity_id", required: false },
34+
{ name: "ingestion_jobs", orderBy: "id", required: false },
35+
{ name: "ingestion_items", orderBy: "id", required: false },
3436
];
3537

3638
// ---------------------------------------------------------------------------
@@ -41,7 +43,14 @@ function loadEnvFile() {
4143
const envPath = path.join(SCRIPT_DIR, ".env.local");
4244
const vars = {};
4345
if (fs.existsSync(envPath)) {
44-
for (const line of fs.readFileSync(envPath, "utf8").split("\n")) {
46+
let isFirstLine = true;
47+
for (let line of fs.readFileSync(envPath, "utf8").split("\n")) {
48+
// Strip UTF-8 BOM from the first line -- Notepad and some VS Code
49+
// configurations on Windows write it, which would otherwise poison
50+
// the first key name (e.g. "\uFEFFSUPABASE_URL") and cause a
51+
// confusing "SUPABASE_URL not found" even though it's right there.
52+
if (isFirstLine && line.charCodeAt(0) === 0xFEFF) line = line.slice(1);
53+
isFirstLine = false;
4554
const trimmed = line.trim();
4655
if (!trimmed || trimmed.startsWith("#")) continue;
4756
const eqIdx = trimmed.indexOf("=");
@@ -90,6 +99,19 @@ const HEADERS = {
9099
Prefer: "count=exact",
91100
};
92101

102+
// Bounded per-request timeout. Unattended backup jobs must either finish or
103+
// fail within a predictable window -- a hung connection should not keep a
104+
// cron job alive forever. 60s is generous for a 1000-row page; override with
105+
// FETCH_TIMEOUT_MS for slow tiers or very large tables.
106+
const FETCH_TIMEOUT_MS = (() => {
107+
const raw =
108+
process.env.FETCH_TIMEOUT_MS ||
109+
envVars.FETCH_TIMEOUT_MS ||
110+
"";
111+
const parsed = parseInt(raw, 10);
112+
return Number.isFinite(parsed) && parsed > 0 ? parsed : 60_000;
113+
})();
114+
93115
// ---------------------------------------------------------------------------
94116
// Helpers
95117
// ---------------------------------------------------------------------------
@@ -101,19 +123,54 @@ function today() {
101123
function humanSize(bytes) {
102124
if (bytes < 1024) return `${bytes} B`;
103125
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
104-
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
126+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
127+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
105128
}
106129

107130
/** Fetch a single page of rows from a table. */
108131
async function fetchPage(table, orderBy, offset, limit) {
109132
const url = `${REST_BASE}/${table}?order=${orderBy}&limit=${limit}&offset=${offset}`;
110133
const rangeEnd = offset + limit - 1;
111-
const res = await fetch(url, {
112-
headers: {
113-
...HEADERS,
114-
Range: `${offset}-${rangeEnd}`,
115-
},
116-
});
134+
135+
// Node 18+ fetch() has no default timeout. Wire up AbortController so a
136+
// hung Supabase connection can't hang the whole backup run.
137+
const controller = new AbortController();
138+
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
139+
let res;
140+
try {
141+
res = await fetch(url, {
142+
headers: {
143+
...HEADERS,
144+
Range: `${offset}-${rangeEnd}`,
145+
},
146+
signal: controller.signal,
147+
});
148+
} catch (err) {
149+
if (err && err.name === "AbortError") {
150+
throw new Error(
151+
`PostgREST request for ${table} timed out after ${FETCH_TIMEOUT_MS} ms ` +
152+
`(raise FETCH_TIMEOUT_MS if this table is legitimately slow)`
153+
);
154+
}
155+
throw err;
156+
} finally {
157+
clearTimeout(timer);
158+
}
159+
160+
if (res.status === 404) {
161+
// PostgREST returns 404 with `code: "PGRST205"` when the table is not in
162+
// the schema cache. Any other 404 (typo in SUPABASE_URL, paused project,
163+
// wrong schema, custom API gateway) should surface loudly, not be
164+
// silently treated as "table missing" -- that's how backup tools lose
165+
// data without anyone noticing.
166+
const rawBody = await res.text();
167+
let parsed = null;
168+
try { parsed = JSON.parse(rawBody); } catch {}
169+
if (parsed && parsed.code === "PGRST205") {
170+
return { rows: [], total: null, missing: true };
171+
}
172+
throw new Error(`PostgREST error 404 on ${table}: ${rawBody}`);
173+
}
117174

118175
if (!res.ok && res.status !== 206) {
119176
const body = await res.text();
@@ -132,54 +189,89 @@ async function fetchPage(table, orderBy, offset, limit) {
132189
}
133190

134191
/** Export one table, streaming rows to disk. */
135-
async function exportTable(tableName, orderBy, backupDir, dateStr) {
192+
async function exportTable(tableName, orderBy, backupDir, dateStr, required) {
136193
const filePath = path.join(backupDir, `${tableName}-${dateStr}.json`);
194+
// Write to a sibling .tmp file and atomically rename on success. Any crash
195+
// (network error, process kill) leaves only the .tmp behind, so yesterday's
196+
// valid backup is never overwritten by today's partial one.
197+
const tmpPath = `${filePath}.tmp`;
137198
let offset = 0;
138199
let total = null;
139200
let rowCount = 0;
140201

141202
const first = await fetchPage(tableName, orderBy, 0, PAGE_SIZE);
142-
total = first.total;
143203

144204
const label = ` ${tableName}`;
145-
if (first.rows.length === 0) {
146-
process.stdout.write(`${label}: 0 rows (empty table)\n`);
147-
fs.writeFileSync(filePath, "[]");
148-
return { rowCount: 0, filePath, fileSize: 2 };
205+
if (first.missing) {
206+
if (required) {
207+
throw new Error(`Required table "${tableName}" not found in Supabase project`);
208+
}
209+
process.stdout.write(`${label}: skipped (table not present)\n`);
210+
return { rowCount: 0, filePath: null, fileSize: 0, skipped: true };
149211
}
150212

151-
const fd = fs.openSync(filePath, "w");
152-
fs.writeSync(fd, "[\n");
153-
let firstRow = true;
213+
total = first.total;
154214

155-
function writeRows(rows) {
156-
for (const row of rows) {
157-
if (!firstRow) fs.writeSync(fd, ",\n");
158-
fs.writeSync(fd, JSON.stringify(row));
159-
firstRow = false;
160-
rowCount++;
215+
if (first.rows.length === 0) {
216+
process.stdout.write(`${label}: 0 rows (empty table)\n`);
217+
// Even the two-byte "[]" path writes via tmp+rename so we never leave a
218+
// half-written file in the final location.
219+
try {
220+
fs.writeFileSync(tmpPath, "[]");
221+
fs.renameSync(tmpPath, filePath);
222+
} catch (err) {
223+
try { fs.unlinkSync(tmpPath); } catch {}
224+
throw err;
161225
}
226+
return { rowCount: 0, filePath, fileSize: 2 };
162227
}
163228

164-
writeRows(first.rows);
165-
process.stdout.write(
166-
`${label}: ${rowCount}${total != null ? "/" + total : ""} rows\r`
167-
);
168-
169-
offset = PAGE_SIZE;
170-
while (first.rows.length === PAGE_SIZE && (total == null || offset < total)) {
171-
const page = await fetchPage(tableName, orderBy, offset, PAGE_SIZE);
172-
if (page.rows.length === 0) break;
173-
writeRows(page.rows);
174-
offset += page.rows.length;
229+
const fd = fs.openSync(tmpPath, "w");
230+
let closed = false;
231+
try {
232+
fs.writeSync(fd, "[\n");
233+
let firstRow = true;
234+
235+
function writeRows(rows) {
236+
for (const row of rows) {
237+
if (!firstRow) fs.writeSync(fd, ",\n");
238+
fs.writeSync(fd, JSON.stringify(row));
239+
firstRow = false;
240+
rowCount++;
241+
}
242+
}
175243

244+
writeRows(first.rows);
176245
process.stdout.write(
177246
`${label}: ${rowCount}${total != null ? "/" + total : ""} rows\r`
178247
);
179-
}
180248

181-
fs.writeSync(fd, "\n]");
182-
fs.closeSync(fd);
249+
let lastPageSize = first.rows.length;
250+
offset = PAGE_SIZE;
251+
while (lastPageSize === PAGE_SIZE && (total == null || offset < total)) {
252+
const page = await fetchPage(tableName, orderBy, offset, PAGE_SIZE);
253+
lastPageSize = page.rows.length;
254+
if (lastPageSize === 0) break;
255+
writeRows(page.rows);
256+
offset += lastPageSize;
257+
258+
process.stdout.write(
259+
`${label}: ${rowCount}${total != null ? "/" + total : ""} rows\r`
260+
);
261+
}
262+
263+
fs.writeSync(fd, "\n]");
264+
fs.closeSync(fd);
265+
closed = true;
266+
267+
fs.renameSync(tmpPath, filePath);
268+
} catch (err) {
269+
if (!closed) {
270+
try { fs.closeSync(fd); } catch {}
271+
}
272+
try { fs.unlinkSync(tmpPath); } catch {}
273+
throw err;
274+
}
183275

184276
const fileSize = fs.statSync(filePath).size;
185277

@@ -209,7 +301,7 @@ async function main() {
209301
const results = [];
210302
for (const table of TABLES) {
211303
try {
212-
const result = await exportTable(table.name, table.orderBy, backupDir, dateStr);
304+
const result = await exportTable(table.name, table.orderBy, backupDir, dateStr, table.required);
213305
results.push({ table: table.name, ...result });
214306
} catch (err) {
215307
console.error(`\n ERROR exporting ${table.name}: ${err.message}`);

0 commit comments

Comments
 (0)