Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions charts/dremio_v2/templates/backup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: dremio-jobs-role
namespace: {{.Release.Namespace}}
rules:
- apiGroups: [""]
resources: ["pods", "pods/exec"]
verbs: ["get", "list", "watch", "create"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: dremio-jobs
namespace: {{.Release.Namespace}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: dremio-jobs
namespace: {{.Release.Namespace}}
subjects:
- kind: ServiceAccount
name: dremio-jobs
namespace: {{.Release.Namespace}}
roleRef:
kind: Role
name: dremio-jobs-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: dremio-backup-s3
namespace: {{.Release.Namespace}}
spec:
schedule: "0 */3 * * *"
failedJobsHistoryLimit: 1
successfulJobsHistoryLimit: 1
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: backup
image: bitnami/kubectl:latest
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- kubectl exec dremio-master-0 --container dremio-master-coordinator -- /opt/dremio/bin/dremio-admin backup -l -d dremioS3:///{{ $.Values.distStorage.aws.bucketName }}/backup/daily
restartPolicy: Never
serviceAccountName: dremio-jobs
150 changes: 150 additions & 0 deletions charts/dremio_v2/templates/cleanup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: cleanup-backups
data:
cleanup.py: |
import os
import boto3
import logging
from datetime import datetime, timedelta
from botocore.client import Config

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

# Create S3 client for object storage
def create_s3_client(endpoint, access_key, secret_key, region):
try:
session = boto3.session.Session()
s3_client = session.client(
's3',
endpoint_url=endpoint,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
config=Config(signature_version='s3v4'),
region_name=region
)
return s3_client
except Exception as e:
logger.error(f"Failed to create S3 client: {e}")
raise

# Generate S3 keys
def generate_s3_keys(bucket, s3_client, prefix='/', delimiter='/', start_after=''):
try:
s3_paginator = s3_client.get_paginator('list_objects_v2')
prefix = prefix.lstrip(delimiter)
start_after = start_after or prefix
for page in s3_paginator.paginate(Bucket=bucket, Prefix=prefix, StartAfter=start_after):
for content in page.get('Contents', []):
yield content['Key']
except Exception as e:
logger.error(f"Failed to list objects: {e}")
raise

# Cleanup backups older than the threshold date
def cleanup_backups(s3_client, bucket, obj, threshold_date):
try:
folder_name = obj.split('/')[2]
creation_date_str = folder_name.split('_')[-2]
creation_date = datetime.strptime(creation_date_str, '%Y-%m-%d')
if creation_date < threshold_date:
s3_client.delete_object(Bucket=bucket, Key=obj)
logger.info(f"Deleted folder: {obj}", )
except (IndexError, ValueError) as e:
logger.warning(f"Skipping invalid folder name: {obj} {e}")

def main():
# Retrieve environment variables
storage_endpoint = os.getenv('ENDPOINT')
storage_access_key = os.getenv('S3_ACCESS_KEY_ID')
storage_secret_key = os.getenv('S3_SECRET_ACCESS_KEY')
bucket_name = os.getenv('BUCKET_NAME')
folder_prefix = os.getenv('FOLDER_PREFIX')
region_name = os.getenv('REGION_NAME')
number_days_max_to_keep = int(os.getenv('NUMBER_DAYS_MAX_TO_KEEP'))
number_backups_to_keep = int(os.getenv('NUMBER_BACKUPS_TO_KEEP'))

# Validate essential environment variables
if not all([storage_endpoint, storage_access_key, storage_secret_key, bucket_name, folder_prefix, region_name, number_days_max_to_keep, number_backups_to_keep]):
logger.error("Missing one or more required environment variables")
return

# Calculate the threshold date
threshold_date = datetime.now() - timedelta(days=number_days_max_to_keep)

# Create S3 client
s3_client = create_s3_client(storage_endpoint, storage_access_key, storage_secret_key, region_name)

# Retrieve and process objects
all_objects = []
unique_folders = set()
objects_generator = generate_s3_keys(bucket_name, s3_client, prefix=folder_prefix)

for obj in objects_generator:
all_objects.append(obj)
unique_folders.add(obj.split('/')[2])

folder_count = len(unique_folders)
logger.info(f'Total number of backups in {folder_prefix}: {folder_count}')

if folder_count > number_backups_to_keep: #56 represents a week of Dremio backups that runs every 3 hours
for obj in all_objects:
cleanup_backups(s3_client, bucket_name, obj, threshold_date)
else:
logger.info(f'Due to low number of backups {folder_count} no backups in {folder_prefix} have been deleted' )

if __name__ == "__main__":
main()

---
apiVersion: batch/v1
kind: CronJob
metadata:
name: dremio-backup-cleanup
namespace: {{.Release.Namespace}}
spec:
schedule: "* 22 * * *"
failedJobsHistoryLimit: 1
successfulJobsHistoryLimit: 1
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: cleanup-job
image: python:3.12-alpine
env:
- name: ENDPOINT
value: "{{ $.Values.distStorage.aws.backup.host }}"
- name: S3_ACCESS_KEY_ID
value: "{{ $.Values.distStorage.aws.credentials.accessKey }}"
- name: S3_SECRET_ACCESS_KEY
value: "{{ $.Values.distStorage.aws.credentials.secret }}"
- name: BUCKET_NAME
value: "{{ $.Values.distStorage.aws.bucketName }}"
- name: FOLDER_PREFIX
value: "{{ $.Values.distStorage.aws.backup.path }}"
- name: REGION_NAME
value: "{{ $.Values.distStorage.aws.backup.region }}"
- name: NUMBER_DAYS_MAX_TO_KEEP
value: "{{ $.Values.distStorage.aws.backup.numberDaysMaxToKeep }}"
- name: NUMBER_BACKUPS_TO_KEEP
value: "{{ $.Values.distStorage.aws.backup.numberBackupsToKeep }}"
command: ["/bin/sh", "-c"]
args:
[
"pip install -q boto3 botocore && python /tmp/python/cleanup.py",
]
volumeMounts:
- name: scripts
mountPath: /tmp/python
restartPolicy: Never
volumes:
- name: scripts
configMap:
name: cleanup-backups
6 changes: 6 additions & 0 deletions charts/dremio_v2/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,12 @@ distStorage:
bucketName: "AWS Bucket Name"
path: "/"
authentication: "metadata"
backup:
path: "backup/daily/"
host: "https://s3.amazonaws.com"
region: "eu-central-01"
numberDaysMaxToKeep: "30" #define how backup cleanup should behaive
numberBackupsToKeep: "56" #56 represents a week of Dremio backups that runs every 3 hours
# If using accessKeySecret for authentication against S3, uncomment the lines below and use the values
# to configure the appropriate credentials.
#
Expand Down