Skip to content

Commit 7dab7b6

Browse files
committed
use rclone image for snapshot backups
1 parent bf6a3cd commit 7dab7b6

File tree

5 files changed

+550
-151
lines changed

5 files changed

+550
-151
lines changed

api/v1alpha1/memgraphcluster_types.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,18 @@ type SnapshotSpec struct {
179179
// +optional
180180
RetentionCount int32 `json:"retentionCount,omitempty"`
181181

182+
// ServiceAccountName is the Kubernetes service account to use for the snapshot CronJob pod.
183+
// Required for both S3 (IRSA on EKS) and GCS (Workload Identity on GKE).
184+
// +optional
185+
ServiceAccountName string `json:"serviceAccountName,omitempty"`
186+
182187
// S3 defines optional S3 backup configuration
183188
// +optional
184189
S3 *S3BackupSpec `json:"s3,omitempty"`
190+
191+
// GCS defines optional GCS backup configuration
192+
// +optional
193+
GCS *GCSBackupSpec `json:"gcs,omitempty"`
185194
}
186195

187196
// S3BackupSpec defines S3 backup configuration
@@ -219,6 +228,22 @@ type S3BackupSpec struct {
219228
RetentionDays int32 `json:"retentionDays,omitempty"`
220229
}
221230

231+
// GCSBackupSpec defines GCS backup configuration
232+
type GCSBackupSpec struct {
233+
// Enabled enables GCS backups
234+
// +optional
235+
Enabled bool `json:"enabled,omitempty"`
236+
237+
// Bucket is the GCS bucket name
238+
// +optional
239+
Bucket string `json:"bucket,omitempty"`
240+
241+
// Prefix is the path prefix within the bucket
242+
// +kubebuilder:default="memgraph/snapshots"
243+
// +optional
244+
Prefix string `json:"prefix,omitempty"`
245+
}
246+
222247
// MemgraphClusterStatus defines the observed state of MemgraphCluster
223248
type MemgraphClusterStatus struct {
224249
// Phase is the current phase of the cluster
@@ -249,6 +274,10 @@ type MemgraphClusterStatus struct {
249274
// +optional
250275
LastS3BackupTime *metav1.Time `json:"lastS3BackupTime,omitempty"`
251276

277+
// LastGCSBackupTime is the time of the last successful GCS backup
278+
// +optional
279+
LastGCSBackupTime *metav1.Time `json:"lastGCSBackupTime,omitempty"`
280+
252281
// Validation contains real-time validation test results
253282
// +optional
254283
Validation *ValidationStatus `json:"validation,omitempty"`

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/memgraph.base14.io_memgraphclusters.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,20 @@ spec:
11291129
default: true
11301130
description: Enabled enables periodic snapshots
11311131
type: boolean
1132+
gcs:
1133+
description: GCS defines optional GCS backup configuration
1134+
properties:
1135+
bucket:
1136+
description: Bucket is the GCS bucket name
1137+
type: string
1138+
enabled:
1139+
description: Enabled enables GCS backups
1140+
type: boolean
1141+
prefix:
1142+
default: memgraph/snapshots
1143+
description: Prefix is the path prefix within the bucket
1144+
type: string
1145+
type: object
11321146
retentionCount:
11331147
default: 5
11341148
description: RetentionCount is the number of snapshots to retain
@@ -1183,6 +1197,11 @@ spec:
11831197
default: '*/15 * * * *'
11841198
description: Schedule is a cron expression for snapshot frequency
11851199
type: string
1200+
serviceAccountName:
1201+
description: |-
1202+
ServiceAccountName is the Kubernetes service account to use for the snapshot CronJob pod.
1203+
Required for both S3 (IRSA on EKS) and GCS (Workload Identity on GKE).
1204+
type: string
11861205
type: object
11871206
storage:
11881207
description: Storage defines the persistent storage configuration
@@ -1302,6 +1321,11 @@ spec:
13021321
- type
13031322
type: object
13041323
type: array
1324+
lastGCSBackupTime:
1325+
description: LastGCSBackupTime is the time of the last successful
1326+
GCS backup
1327+
format: date-time
1328+
type: string
13051329
lastS3BackupTime:
13061330
description: LastS3BackupTime is the time of the last successful S3
13071331
backup

internal/controller/snapshot.go

Lines changed: 97 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ import (
2121
)
2222

2323
const (
24-
// Default AWS CLI image for S3 uploads
25-
defaultAWSCLIImage = "amazon/aws-cli:latest"
24+
// Default rclone image for S3/GCS uploads
25+
defaultRcloneImage = "rclone/rclone:1.73.0"
2626

2727
// Shared volume name for snapshot data between containers
2828
snapshotDataVolume = "snapshot-data"
@@ -104,7 +104,8 @@ func buildSnapshotCronJob(cluster *memgraphv1alpha1.MemgraphCluster) *batchv1.Cr
104104
Labels: labelsForCluster(cluster),
105105
},
106106
Spec: corev1.PodSpec{
107-
RestartPolicy: corev1.RestartPolicyOnFailure,
107+
RestartPolicy: corev1.RestartPolicyOnFailure,
108+
ServiceAccountName: cluster.Spec.Snapshot.ServiceAccountName,
108109
SecurityContext: &corev1.PodSecurityContext{
109110
RunAsUser: &runAsUser,
110111
RunAsGroup: &runAsGroup,
@@ -151,8 +152,8 @@ echo "Snapshot created successfully at $(date)"
151152
},
152153
}
153154

154-
// Init container 2: Copy snapshot files to shared volume (if S3 enabled)
155-
if cluster.Spec.Snapshot.S3 != nil && cluster.Spec.Snapshot.S3.Enabled {
155+
// Init container 2: Copy snapshot files to shared volume (if S3 or GCS enabled)
156+
if isRemoteBackupEnabled(cluster) {
156157
// Use bitnami/kubectl for copying files from the main pod
157158
copyCmd := fmt.Sprintf(`
158159
set -e
@@ -202,12 +203,14 @@ ls -la /snapshot-data/snapshots/
202203

203204
// buildSnapshotMainContainers builds the main containers for the snapshot job
204205
func buildSnapshotMainContainers(cluster *memgraphv1alpha1.MemgraphCluster) []corev1.Container {
205-
// If S3 is enabled, main container uploads to S3
206206
if cluster.Spec.Snapshot.S3 != nil && cluster.Spec.Snapshot.S3.Enabled {
207-
return []corev1.Container{buildS3UploadContainer(cluster)}
207+
return []corev1.Container{buildRcloneUploadContainer(cluster, "s3")}
208+
}
209+
210+
if cluster.Spec.Snapshot.GCS != nil && cluster.Spec.Snapshot.GCS.Enabled {
211+
return []corev1.Container{buildRcloneUploadContainer(cluster, "gcs")}
208212
}
209213

210-
// Otherwise, just a completion container
211214
return []corev1.Container{
212215
{
213216
Name: "complete",
@@ -224,50 +227,42 @@ func buildSnapshotMainContainers(cluster *memgraphv1alpha1.MemgraphCluster) []co
224227
}
225228
}
226229

227-
// buildS3UploadContainer builds the S3 upload container
228-
func buildS3UploadContainer(cluster *memgraphv1alpha1.MemgraphCluster) corev1.Container {
229-
s3 := cluster.Spec.Snapshot.S3
230-
prefix := s3.Prefix
231-
if prefix == "" {
232-
prefix = "memgraph/snapshots"
230+
// isRemoteBackupEnabled returns true if either S3 or GCS backup is enabled
231+
func isRemoteBackupEnabled(cluster *memgraphv1alpha1.MemgraphCluster) bool {
232+
if cluster.Spec.Snapshot.S3 != nil && cluster.Spec.Snapshot.S3.Enabled {
233+
return true
233234
}
235+
if cluster.Spec.Snapshot.GCS != nil && cluster.Spec.Snapshot.GCS.Enabled {
236+
return true
237+
}
238+
return false
239+
}
234240

235-
// Build S3 upload command
236-
s3Cmd := fmt.Sprintf(`
237-
set -e
238-
239-
TIMESTAMP=$(cat /snapshot-data/timestamp)
240-
BACKUP_PATH="s3://%s/%s/%s/${TIMESTAMP}"
241-
242-
echo "Uploading snapshot to ${BACKUP_PATH}..."
243-
244-
# Configure endpoint if specified
245-
%s
246-
247-
# Upload to S3
248-
if [ -d "/snapshot-data/snapshots" ] && [ "$(ls -A /snapshot-data/snapshots 2>/dev/null)" ]; then
249-
aws s3 cp /snapshot-data/snapshots/ ${BACKUP_PATH}/snapshots/ --recursive
250-
echo "Snapshot uploaded successfully to ${BACKUP_PATH}"
251-
else
252-
echo "No snapshot files found to upload"
253-
exit 1
254-
fi
241+
// buildRcloneUploadContainer builds the rclone upload container for S3 or GCS
242+
func buildRcloneUploadContainer(cluster *memgraphv1alpha1.MemgraphCluster, backend string) corev1.Container {
243+
var rcloneCmd string
244+
var envVars []corev1.EnvVar
255245

256-
echo "S3 backup completed at $(date)"
257-
`, s3.Bucket, prefix, cluster.Name, buildS3EndpointConfig(s3))
246+
switch backend {
247+
case "s3":
248+
rcloneCmd = buildRcloneS3Command(cluster)
249+
envVars = buildS3Env(cluster)
250+
case "gcs":
251+
rcloneCmd = buildRcloneGCSCommand(cluster)
252+
}
258253

259254
return corev1.Container{
260-
Name: "s3-upload",
261-
Image: defaultAWSCLIImage,
255+
Name: "rclone-upload",
256+
Image: defaultRcloneImage,
262257
Command: []string{"/bin/sh", "-c"},
263-
Args: []string{s3Cmd},
258+
Args: []string{rcloneCmd},
264259
SecurityContext: &corev1.SecurityContext{
265260
AllowPrivilegeEscalation: ptr(false),
266261
Capabilities: &corev1.Capabilities{
267262
Drop: []corev1.Capability{"ALL"},
268263
},
269264
},
270-
Env: buildS3Env(cluster),
265+
Env: envVars,
271266
VolumeMounts: []corev1.VolumeMount{
272267
{
273268
Name: snapshotDataVolume,
@@ -277,12 +272,69 @@ echo "S3 backup completed at $(date)"
277272
}
278273
}
279274

275+
// buildRcloneS3Command builds the rclone command for S3 uploads
276+
func buildRcloneS3Command(cluster *memgraphv1alpha1.MemgraphCluster) string {
277+
s3 := cluster.Spec.Snapshot.S3
278+
prefix := s3.Prefix
279+
if prefix == "" {
280+
prefix = "memgraph/snapshots"
281+
}
282+
283+
endpointFlag := ""
284+
if s3.Endpoint != "" {
285+
endpointFlag = fmt.Sprintf("--s3-endpoint %s", s3.Endpoint)
286+
}
287+
288+
regionFlag := ""
289+
if s3.Region != "" {
290+
regionFlag = fmt.Sprintf("--s3-region %s", s3.Region)
291+
}
292+
293+
return fmt.Sprintf(`
294+
set -e
295+
TIMESTAMP=$(cat /snapshot-data/timestamp)
296+
DEST=":s3:%s/%s/%s/${TIMESTAMP}/snapshots"
297+
echo "Uploading snapshot to ${DEST}..."
298+
if [ -d "/snapshot-data/snapshots" ] && [ "$(ls -A /snapshot-data/snapshots 2>/dev/null)" ]; then
299+
rclone copy /snapshot-data/snapshots/ "${DEST}" --s3-provider AWS --s3-env-auth %s %s -v
300+
echo "Snapshot uploaded successfully"
301+
else
302+
echo "No snapshot files found to upload"
303+
exit 1
304+
fi
305+
echo "S3 backup completed at $(date)"
306+
`, s3.Bucket, prefix, cluster.Name, regionFlag, endpointFlag)
307+
}
308+
309+
// buildRcloneGCSCommand builds the rclone command for GCS uploads
310+
func buildRcloneGCSCommand(cluster *memgraphv1alpha1.MemgraphCluster) string {
311+
gcs := cluster.Spec.Snapshot.GCS
312+
prefix := gcs.Prefix
313+
if prefix == "" {
314+
prefix = "memgraph/snapshots"
315+
}
316+
317+
return fmt.Sprintf(`
318+
set -e
319+
TIMESTAMP=$(cat /snapshot-data/timestamp)
320+
DEST=":gcs:%s/%s/%s/${TIMESTAMP}/snapshots"
321+
echo "Uploading snapshot to ${DEST}..."
322+
if [ -d "/snapshot-data/snapshots" ] && [ "$(ls -A /snapshot-data/snapshots 2>/dev/null)" ]; then
323+
rclone copy /snapshot-data/snapshots/ "${DEST}" --gcs-env-auth -v
324+
echo "Snapshot uploaded successfully"
325+
else
326+
echo "No snapshot files found to upload"
327+
exit 1
328+
fi
329+
echo "GCS backup completed at $(date)"
330+
`, gcs.Bucket, prefix, cluster.Name)
331+
}
332+
280333
// buildSnapshotVolumes builds the volumes for the snapshot job
281334
func buildSnapshotVolumes(cluster *memgraphv1alpha1.MemgraphCluster) []corev1.Volume {
282335
var volumes []corev1.Volume
283336

284-
// Add shared volume if S3 is enabled
285-
if cluster.Spec.Snapshot.S3 != nil && cluster.Spec.Snapshot.S3.Enabled {
337+
if isRemoteBackupEnabled(cluster) {
286338
volumes = append(volumes, corev1.Volume{
287339
Name: snapshotDataVolume,
288340
VolumeSource: corev1.VolumeSource{
@@ -294,14 +346,6 @@ func buildSnapshotVolumes(cluster *memgraphv1alpha1.MemgraphCluster) []corev1.Vo
294346
return volumes
295347
}
296348

297-
// buildS3EndpointConfig builds AWS CLI endpoint configuration
298-
func buildS3EndpointConfig(s3 *memgraphv1alpha1.S3BackupSpec) string {
299-
if s3.Endpoint == "" {
300-
return ""
301-
}
302-
return fmt.Sprintf(`export AWS_ENDPOINT_URL="%s"`, s3.Endpoint)
303-
}
304-
305349
// buildS3Env builds environment variables for the S3 upload container
306350
func buildS3Env(cluster *memgraphv1alpha1.MemgraphCluster) []corev1.EnvVar {
307351
var envVars []corev1.EnvVar
@@ -312,7 +356,6 @@ func buildS3Env(cluster *memgraphv1alpha1.MemgraphCluster) []corev1.EnvVar {
312356

313357
s3 := cluster.Spec.Snapshot.S3
314358

315-
// Add S3 credentials if configured
316359
if s3.SecretRef != nil {
317360
envVars = append(envVars,
318361
corev1.EnvVar{
@@ -375,9 +418,10 @@ func (r *MemgraphClusterReconciler) reconcileSnapshotCronJob(ctx context.Context
375418
return err
376419
}
377420

378-
// Update if schedule or S3 config changed
421+
// Update if schedule, backup config, or service account changed
379422
needsUpdate := existing.Spec.Schedule != desired.Spec.Schedule ||
380-
len(existing.Spec.JobTemplate.Spec.Template.Spec.Containers) != len(desired.Spec.JobTemplate.Spec.Template.Spec.Containers)
423+
len(existing.Spec.JobTemplate.Spec.Template.Spec.Containers) != len(desired.Spec.JobTemplate.Spec.Template.Spec.Containers) ||
424+
existing.Spec.JobTemplate.Spec.Template.Spec.ServiceAccountName != desired.Spec.JobTemplate.Spec.Template.Spec.ServiceAccountName
381425

382426
if needsUpdate {
383427
log.Info("updating snapshot CronJob",

0 commit comments

Comments
 (0)