Skip to content
This repository was archived by the owner on Jan 29, 2026. It is now read-only.
This repository was archived by the owner on Jan 29, 2026. It is now read-only.

[Database] Implement Automated Backup System #73

@coderabbitai

Description

@coderabbitai

📊 Priority: MEDIUM - Production Readiness

Background

The .data/ directory containing all workflows, store state, and sessions has no backup mechanism. This creates risk of data loss from accidental deletion, corruption, or disk failure.

Current State - No Backup Strategy

// backend/src/db/database.js
const DB_DIR = path.join(process.cwd(), '.data');
const WORKFLOWS_FILE = path.join(DB_DIR, 'workflows.json');
const STORE_STATE_FILE = path.join(DB_DIR, 'store-state.json');
const SESSIONS_FILE = path.join(DB_DIR, 'sessions.json');

// No backup functions exist

Data at Risk

  • Workflows: User-created workflow definitions with nodes/edges
  • Store State: Current UI state (viewport, selections, etc.)
  • Sessions: Active session data with authentication info

Recommended Solution

Part 1: Backup Service

// backend/src/db/backup.js (NEW FILE)
import fs from 'fs/promises';
import path from 'path';
import { createGzip } from 'zlib';
import { pipeline } from 'stream/promises';
import { createReadStream, createWriteStream } from 'fs';
import { logger } from '../utils/logger.js';

const DB_DIR = path.join(process.cwd(), '.data');
const BACKUP_DIR = path.join(DB_DIR, 'backups');
const MAX_BACKUPS = parseInt(process.env.MAX_BACKUPS) || 30;

/**
 * Create a timestamped backup of all database files
 */
export async function createBackup() {
  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
  const backupName = `backup-${timestamp}`;
  const backupPath = path.join(BACKUP_DIR, backupName);
  
  try {
    // Ensure backup directory exists
    await fs.mkdir(BACKUP_DIR, { recursive: true });
    await fs.mkdir(backupPath, { recursive: true });
    
    const files = ['workflows.json', 'store-state.json', 'sessions.json'];
    const backedUp = [];
    
    for (const file of files) {
      const sourcePath = path.join(DB_DIR, file);
      const destPath = path.join(backupPath, file);
      const gzipPath = `${destPath}.gz`;
      
      // Check if source exists
      try {
        await fs.access(sourcePath);
      } catch {
        logger.warn({ file }, 'Backup source file not found, skipping');
        continue;
      }
      
      // Compress and copy
      await pipeline(
        createReadStream(sourcePath),
        createGzip(),
        createWriteStream(gzipPath)
      );
      
      backedUp.push(file);
    }
    
    // Write backup metadata
    const metadata = {
      timestamp: new Date().toISOString(),
      files: backedUp,
      version: process.env.npm_package_version
    };
    await fs.writeFile(
      path.join(backupPath, 'metadata.json'),
      JSON.stringify(metadata, null, 2)
    );
    
    logger.info({ backupName, files: backedUp }, 'Database backup created');
    
    // Clean up old backups
    await cleanOldBackups();
    
    return backupPath;
  } catch (error) {
    logger.error({ error }, 'Failed to create backup');
    throw error;
  }
}

/**
 * Restore database from a backup
 */
export async function restoreBackup(backupName) {
  const backupPath = path.join(BACKUP_DIR, backupName);
  
  try {
    // Verify backup exists
    await fs.access(backupPath);
    
    // Read metadata
    const metadata = JSON.parse(
      await fs.readFile(path.join(backupPath, 'metadata.json'), 'utf-8')
    );
    
    logger.info({ backupName, metadata }, 'Restoring from backup');
    
    for (const file of metadata.files) {
      const gzipPath = path.join(backupPath, `${file}.gz`);
      const destPath = path.join(DB_DIR, file);
      
      // Decompress and restore
      await pipeline(
        createReadStream(gzipPath),
        createGzip({ level: 0 }), // Decompress
        createWriteStream(destPath)
      );
    }
    
    logger.info({ backupName }, 'Database restored successfully');
  } catch (error) {
    logger.error({ error, backupName }, 'Failed to restore backup');
    throw error;
  }
}

/**
 * List all available backups
 */
export async function listBackups() {
  try {
    const entries = await fs.readdir(BACKUP_DIR, { withFileTypes: true });
    const backups = [];
    
    for (const entry of entries) {
      if (entry.isDirectory() && entry.name.startsWith('backup-')) {
        const metadataPath = path.join(BACKUP_DIR, entry.name, 'metadata.json');
        try {
          const metadata = JSON.parse(await fs.readFile(metadataPath, 'utf-8'));
          const stats = await fs.stat(path.join(BACKUP_DIR, entry.name));
          backups.push({
            name: entry.name,
            timestamp: metadata.timestamp,
            files: metadata.files,
            size: stats.size
          });
        } catch {
          // Skip invalid backups
        }
      }
    }
    
    return backups.sort((a, b) => 
      new Date(b.timestamp) - new Date(a.timestamp)
    );
  } catch (error) {
    logger.error({ error }, 'Failed to list backups');
    return [];
  }
}

/**
 * Delete old backups, keeping only MAX_BACKUPS
 */
async function cleanOldBackups() {
  const backups = await listBackups();
  
  if (backups.length > MAX_BACKUPS) {
    const toDelete = backups.slice(MAX_BACKUPS);
    
    for (const backup of toDelete) {
      const backupPath = path.join(BACKUP_DIR, backup.name);
      await fs.rm(backupPath, { recursive: true });
      logger.info({ backup: backup.name }, 'Old backup deleted');
    }
  }
}

/**
 * Get backup statistics
 */
export async function getBackupStats() {
  const backups = await listBackups();
  const totalSize = backups.reduce((sum, b) => sum + b.size, 0);
  
  return {
    count: backups.length,
    totalSize,
    oldest: backups[backups.length - 1]?.timestamp,
    newest: backups[0]?.timestamp
  };
}

Part 2: Scheduled Backups

// backend/src/db/backupScheduler.js (NEW FILE)
import { createBackup } from './backup.js';
import { logger } from '../utils/logger.js';

const BACKUP_INTERVAL = parseInt(process.env.BACKUP_INTERVAL_HOURS) || 24;

let backupInterval;

export function startBackupScheduler() {
  if (backupInterval) {
    logger.warn('Backup scheduler already running');
    return;
  }
  
  // Create initial backup on startup
  createBackup().catch(err => 
    logger.error({ err }, 'Initial backup failed')
  );
  
  // Schedule periodic backups
  backupInterval = setInterval(() => {
    createBackup().catch(err => 
      logger.error({ err }, 'Scheduled backup failed')
    );
  }, BACKUP_INTERVAL * 60 * 60 * 1000);
  
  logger.info({ intervalHours: BACKUP_INTERVAL }, 'Backup scheduler started');
}

export function stopBackupScheduler() {
  if (backupInterval) {
    clearInterval(backupInterval);
    backupInterval = null;
    logger.info('Backup scheduler stopped');
  }
}

Part 3: Integrate with Server Lifecycle

// backend/src/server.js
import { startBackupScheduler, stopBackupScheduler } from './db/backupScheduler.js';
import { createBackup } from './db/backup.js';

// Start scheduler after database initialization
await db.initialize();
startBackupScheduler();

// Backup on graceful shutdown
const shutdown = async (signal) => {
  console.log(`\n🛑 ${signal} received, shutting down gracefully...`);
  
  // Create final backup
  await createBackup().catch(err => 
    logger.error({ err }, 'Shutdown backup failed')
  );
  
  stopBackupScheduler();
  server.close();
  // ... rest of shutdown
};

Part 4: Backup API Endpoints (Optional)

// backend/src/api/routes/admin.js (NEW FILE)
import express from 'express';
import { authenticate } from '../middleware/auth.js';
import { asyncHandler } from '../middleware/errorHandler.js';
import * as backup from '../../db/backup.js';

const router = express.Router();

// List backups
router.get('/backups', 
  authenticate({ required: true }),
  asyncHandler(async (req, res) => {
    const backups = await backup.listBackups();
    const stats = await backup.getBackupStats();
    res.json({ success: true, data: { backups, stats } });
  })
);

// Create backup
router.post('/backups',
  authenticate({ required: true }),
  asyncHandler(async (req, res) => {
    const backupPath = await backup.createBackup();
    res.json({ success: true, data: { backupPath } });
  })
);

// Restore backup (dangerous - require confirmation)
router.post('/backups/:name/restore',
  authenticate({ required: true }),
  asyncHandler(async (req, res) => {
    if (req.body.confirm !== 'RESTORE') {
      return res.status(400).json({
        error: { message: 'Confirmation required: send {"confirm": "RESTORE"}' }
      });
    }
    await backup.restoreBackup(req.params.name);
    res.json({ success: true, message: 'Database restored. Restart recommended.' });
  })
);

export default router;

Files to Create

  • backend/src/db/backup.js (new)
  • backend/src/db/backupScheduler.js (new)
  • backend/src/api/routes/admin.js (optional, new)

Files to Modify

  • backend/src/server.js (integrate backup scheduler)
  • .gitignore (add .data/backups/ to ignore list)

Environment Variables

# Backup configuration
BACKUP_INTERVAL_HOURS=24  # Backup every 24 hours
MAX_BACKUPS=30            # Keep last 30 backups

Acceptance Criteria

  • Backup service creates compressed backups of all database files
  • Backup scheduler runs automatically at configured intervals
  • Backup created on server startup
  • Backup created on graceful shutdown
  • Old backups automatically cleaned up (keep last 30)
  • Restore functionality implemented and tested
  • Backup metadata includes timestamp and file list
  • (Optional) Admin API endpoints for manual backup/restore
  • Documentation added for backup/restore procedures

Manual Backup Commands

# Create manual backup via API
curl -X POST http://localhost:3001/api/admin/backups \
  -H "X-API-Key: $API_KEY"

# List backups
curl http://localhost:3001/api/admin/backups \
  -H "X-API-Key: $API_KEY"

# Restore backup
curl -X POST http://localhost:3001/api/admin/backups/backup-2024-01-20T10-30-00/restore \
  -H "X-API-Key: $API_KEY" \
  -H "Content-Type: application/json" \
  -d '{"confirm": "RESTORE"}'

Backup Directory Structure

.data/
├── workflows.json
├── store-state.json
├── sessions.json
└── backups/
    ├── backup-2024-01-20T10-00-00/
    │   ├── workflows.json.gz
    │   ├── store-state.json.gz
    │   ├── sessions.json.gz
    │   └── metadata.json
    ├── backup-2024-01-21T10-00-00/
    │   └── ...
    └── ...

References

Additional Context

Implement this after resolving database atomicity issues (#68). Consider cloud backup integration for critical production deployments.

Metadata

Metadata

Labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions