-
Notifications
You must be signed in to change notification settings - Fork 189
Expand file tree
/
Copy pathcleanup.yaml
More file actions
150 lines (136 loc) · 5.7 KB
/
cleanup.yaml
File metadata and controls
150 lines (136 loc) · 5.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
---
apiVersion: v1
kind: ConfigMap
metadata:
name: cleanup-backups
data:
cleanup.py: |
import os
import boto3
import logging
from datetime import datetime, timedelta
from botocore.client import Config
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
# Create S3 client for object storage
def create_s3_client(endpoint, access_key, secret_key, region):
try:
session = boto3.session.Session()
s3_client = session.client(
's3',
endpoint_url=endpoint,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
config=Config(signature_version='s3v4'),
region_name=region
)
return s3_client
except Exception as e:
logger.error(f"Failed to create S3 client: {e}")
raise
# Generate S3 keys
def generate_s3_keys(bucket, s3_client, prefix='/', delimiter='/', start_after=''):
try:
s3_paginator = s3_client.get_paginator('list_objects_v2')
prefix = prefix.lstrip(delimiter)
start_after = start_after or prefix
for page in s3_paginator.paginate(Bucket=bucket, Prefix=prefix, StartAfter=start_after):
for content in page.get('Contents', []):
yield content['Key']
except Exception as e:
logger.error(f"Failed to list objects: {e}")
raise
# Cleanup backups older than the threshold date
def cleanup_backups(s3_client, bucket, obj, threshold_date):
try:
folder_name = obj.split('/')[2]
creation_date_str = folder_name.split('_')[-2]
creation_date = datetime.strptime(creation_date_str, '%Y-%m-%d')
if creation_date < threshold_date:
s3_client.delete_object(Bucket=bucket, Key=obj)
logger.info(f"Deleted folder: {obj}", )
except (IndexError, ValueError) as e:
logger.warning(f"Skipping invalid folder name: {obj} {e}")
def main():
# Retrieve environment variables
storage_endpoint = os.getenv('ENDPOINT')
storage_access_key = os.getenv('S3_ACCESS_KEY_ID')
storage_secret_key = os.getenv('S3_SECRET_ACCESS_KEY')
bucket_name = os.getenv('BUCKET_NAME')
folder_prefix = os.getenv('FOLDER_PREFIX')
region_name = os.getenv('REGION_NAME')
number_days_max_to_keep = int(os.getenv('NUMBER_DAYS_MAX_TO_KEEP'))
number_backups_to_keep = int(os.getenv('NUMBER_BACKUPS_TO_KEEP'))
# Validate essential environment variables
if not all([storage_endpoint, storage_access_key, storage_secret_key, bucket_name, folder_prefix, region_name, number_days_max_to_keep, number_backups_to_keep]):
logger.error("Missing one or more required environment variables")
return
# Calculate the threshold date
threshold_date = datetime.now() - timedelta(days=number_days_max_to_keep)
# Create S3 client
s3_client = create_s3_client(storage_endpoint, storage_access_key, storage_secret_key, region_name)
# Retrieve and process objects
all_objects = []
unique_folders = set()
objects_generator = generate_s3_keys(bucket_name, s3_client, prefix=folder_prefix)
for obj in objects_generator:
all_objects.append(obj)
unique_folders.add(obj.split('/')[2])
folder_count = len(unique_folders)
logger.info(f'Total number of backups in {folder_prefix}: {folder_count}')
if folder_count > number_backups_to_keep: #56 represents a week of Dremio backups that runs every 3 hours
for obj in all_objects:
cleanup_backups(s3_client, bucket_name, obj, threshold_date)
else:
logger.info(f'Due to low number of backups {folder_count} no backups in {folder_prefix} have been deleted' )
if __name__ == "__main__":
main()
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: dremio-backup-cleanup
namespace: {{.Release.Namespace}}
spec:
schedule: "* 22 * * *"
failedJobsHistoryLimit: 1
successfulJobsHistoryLimit: 1
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: cleanup-job
image: python:3.12-alpine
env:
- name: ENDPOINT
value: "{{ $.Values.distStorage.aws.backup.host }}"
- name: S3_ACCESS_KEY_ID
value: "{{ $.Values.distStorage.aws.credentials.accessKey }}"
- name: S3_SECRET_ACCESS_KEY
value: "{{ $.Values.distStorage.aws.credentials.secret }}"
- name: BUCKET_NAME
value: "{{ $.Values.distStorage.aws.bucketName }}"
- name: FOLDER_PREFIX
value: "{{ $.Values.distStorage.aws.backup.path }}"
- name: REGION_NAME
value: "{{ $.Values.distStorage.aws.backup.region }}"
- name: NUMBER_DAYS_MAX_TO_KEEP
value: "{{ $.Values.distStorage.aws.backup.numberDaysMaxToKeep }}"
- name: NUMBER_BACKUPS_TO_KEEP
value: "{{ $.Values.distStorage.aws.backup.numberBackupsToKeep }}"
command: ["/bin/sh", "-c"]
args:
[
"pip install -q boto3 botocore && python /tmp/python/cleanup.py",
]
volumeMounts:
- name: scripts
mountPath: /tmp/python
restartPolicy: Never
volumes:
- name: scripts
configMap:
name: cleanup-backups