neo4j-kubernetes-operator/examples/end-to-end/disaster-recovery.yaml at v1.1.0-alpha · neo4j-partners/neo4j-kubernetes-operator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# Disaster Recovery Example
# This example demonstrates:
# - Taking a backup before maintenance
# - Performing a full restore
# - Point-in-time recovery
# - Cross-region restore

---
# SCENARIO 1: Pre-maintenance Backup
---
# Take a manual backup before maintenance window
apiVersion: neo4j.neo4j.com/v1alpha1
kind: Neo4jBackup
metadata:
  name: pre-maintenance-backup
  namespace: neo4j-production
spec:
  target:
    kind: Cluster
    name: production-cluster
    namespace: neo4j-production

  # One-time backup (no schedule)
  storage:
    type: pvc
    pvc:
      name: maintenance-backup-pvc
      size: 50Gi
      storageClassName: fast-ssd

  options:
    backupType: "FULL"
    compress: true
    verifyBackup: true  # Verify integrity after backup

  # Add metadata for tracking
  metadata:
    labels:
      purpose: maintenance
      timestamp: "2025-01-20-1400"
    annotations:
      description: "Pre-maintenance backup before Neo4j upgrade"
      operator: "admin@company.com"
---
# SCENARIO 2: Full Database Restore
---
# Restore entire cluster from backup
apiVersion: neo4j.neo4j.com/v1alpha1
kind: Neo4jRestore
metadata:
  name: full-cluster-restore
  namespace: neo4j-production
spec:
  targetCluster: production-cluster

  # Restore all databases
  databaseName: "*"

  source:
    type: backup
    backupRef: pre-maintenance-backup

  # Force overwrite existing data
  force: true

  options:
    verifyBackup: true
    additionalArgs:
      - "--expand-commands"  # Show detailed progress

  # Pre-restore validation
  preRestoreHooks:
    - type: script
      script: |
        #!/bin/bash
        echo "Starting full cluster restore..."
        echo "Target cluster: production-cluster"
        echo "Source backup: pre-maintenance-backup"

        # Verify cluster is ready for restore
        kubectl get neo4jenterprisecluster production-cluster -n neo4j-production
---
# SCENARIO 3: Point-in-Time Recovery
---
# Restore to specific point in time
apiVersion: neo4j.neo4j.com/v1alpha1
kind: Neo4jRestore
metadata:
  name: pitr-restore
  namespace: neo4j-production
spec:
  targetCluster: production-cluster
  databaseName: maindb

  source:
    type: backup
    backupRef: daily-backup-20250119
    # Restore to specific timestamp
    pointInTime: "2025-01-19T14:30:00Z"

  # Create as new database to compare
  options:
    targetDatabaseName: "maindb_pitr_recovery"
    verifyBackup: true

  postRestoreHooks:
    - type: script
      script: |
        echo "PITR restore completed"
        echo "Original database: maindb"
        echo "Recovered database: maindb_pitr_recovery"
        echo "Point in time: 2025-01-19T14:30:00Z"
---
# SCENARIO 4: Cross-Region Disaster Recovery
---
# Source region backup configuration
apiVersion: neo4j.neo4j.com/v1alpha1
kind: Neo4jBackup
metadata:
  name: cross-region-backup
  namespace: neo4j-production
spec:
  target:
    kind: Cluster
    name: production-cluster
    namespace: neo4j-production

  # Backup to S3 with cross-region replication
  storage:
    type: s3
    bucket: neo4j-dr-backups
    path: "us-east-1/production"
    cloud:
      credentialsSecret: s3-dr-credentials
      region: us-east-1

  schedule:
    cron: "0 */4 * * *"  # Every 4 hours

  options:
    backupType: "AUTO"
    compress: true
    encryption:
      enabled: true
      kmsKeyId: "arn:aws:kms:us-east-1:123456789:key/xxxxx"
---
# Target region restore configuration
apiVersion: neo4j.neo4j.com/v1alpha1
kind: Neo4jRestore
metadata:
  name: cross-region-restore
  namespace: neo4j-dr
spec:
  # Restore to DR cluster in different region
  targetCluster: dr-cluster
  databaseName: "*"

  source:
    type: s3
    bucket: neo4j-dr-backups
    path: "us-east-1/production/backup-20250120-0800"
    cloud:
      credentialsSecret: s3-dr-credentials
      region: us-west-2  # DR region

  force: true

  options:
    verifyBackup: true
    parallelRestore: true  # Speed up large restores

  postRestoreHooks:
    - type: script
      script: |
        echo "Cross-region restore completed"
        echo "Source region: us-east-1"
        echo "Target region: us-west-2"

        # Verify databases are online
        kubectl exec -n neo4j-dr dr-cluster-0 -- cypher-shell \
          -u neo4j -p $NEO4J_PASSWORD \
          "SHOW DATABASES"
---
# SCENARIO 5: Selective Database Restore
---
# Restore only specific database
apiVersion: neo4j.neo4j.com/v1alpha1
kind: Neo4jRestore
metadata:
  name: selective-restore
  namespace: neo4j-production
spec:
  targetCluster: production-cluster

  # Restore only analytics database
  databaseName: analytics

  source:
    type: backup
    backupRef: weekly-full-backup-20250114

  # Don't overwrite, create as new
  force: false
  options:
    targetDatabaseName: "analytics_restored"

  # Validation queries after restore
  postRestoreHooks:
    - type: cypher
      database: analytics_restored
      queries:
        - "MATCH (n) RETURN count(n) as nodeCount"
        - "MATCH ()-[r]->() RETURN count(r) as relationshipCount"
---
# SCENARIO 6: Backup Validation Job
---
# Regular backup validation to ensure recoverability
apiVersion: batch/v1
kind: CronJob
metadata:
  name: backup-validation
  namespace: neo4j-production
spec:
  schedule: "0 6 * * 1"  # Weekly on Monday at 6 AM
  jobTemplate:
    spec:
      template:
        spec:
          containers:
            - name: validator
              image: neo4j/neo4j-admin:5.26.0-enterprise
              command:
                - /bin/bash
                - -c
                - |
                  # List recent backups
                  echo "=== Recent Backups ==="
                  kubectl get neo4jbackup -n neo4j-production

                  # Get latest backup
                  LATEST_BACKUP=$(kubectl get neo4jbackup -n neo4j-production \
                    -o jsonpath='{.items[?(@.status.phase=="Completed")].metadata.name}' | \
                    awk '{print $NF}')

                  echo "Latest backup: $LATEST_BACKUP"

                  # Create test restore
                  cat <<EOF | kubectl apply -f -
                  apiVersion: neo4j.neo4j.com/v1alpha1
                  kind: Neo4jRestore
                  metadata:
                    name: validation-restore-$(date +%s)
                    namespace: neo4j-production
                  spec:
                    targetCluster: test-restore-cluster
                    databaseName: "*"
                    source:
                      type: backup
                      backupRef: $LATEST_BACKUP
                    options:
                      verifyBackup: true
                      dryRun: true  # Validation only
                  EOF
          restartPolicy: OnFailure
---
# SCENARIO 7: Automated Failover Configuration
---
# Configure automatic failover with backup/restore
apiVersion: v1
kind: ConfigMap
metadata:
  name: failover-config
  namespace: neo4j-production
data:
  failover.sh: |
    #!/bin/bash
    # Automated failover script

    PRIMARY_CLUSTER="production-cluster"
    DR_CLUSTER="dr-cluster"
    NAMESPACE="neo4j-production"

    # Check primary cluster health
    PRIMARY_READY=$(kubectl get neo4jenterprisecluster $PRIMARY_CLUSTER \
      -n $NAMESPACE -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')

    if [ "$PRIMARY_READY" != "True" ]; then
      echo "Primary cluster unhealthy, initiating failover..."

      # Find latest backup
      LATEST_BACKUP=$(kubectl get neo4jbackup -n $NAMESPACE \
        --sort-by=.metadata.creationTimestamp \
        -o jsonpath='{.items[-1].metadata.name}')

      # Create restore to DR cluster
      kubectl apply -f - <<EOF
      apiVersion: neo4j.neo4j.com/v1alpha1
      kind: Neo4jRestore
      metadata:
        name: failover-restore-$(date +%s)
        namespace: $NAMESPACE
      spec:
        targetCluster: $DR_CLUSTER
        databaseName: "*"
        source:
          type: backup
          backupRef: $LATEST_BACKUP
        force: true
        options:
          priority: high
    EOF

      # Update DNS/Load balancer to point to DR
      # ... DNS update logic ...

      echo "Failover initiated to DR cluster"
    fi