@@ -43,7 +43,9 @@ function backup_monitoring() {
4343function clean_s3bucket() {
4444 S3_BUCKET=" $1 "
4545 DEL_DAYS=" $2 "
46- if [[ $( s3cmd ls s3://${BUCKET} 2>&1 | grep -q ' NoSuchBucket' ) ]]; then
46+ CONSOLIDATE_AFTER_DAYS=" $3 "
47+
48+ if s3cmd ls s3://${S3_BUCKET} 2>&1 | grep -q ' NoSuchBucket' ; then
4749 echo " buckets empty , no cleaning needed"
4850 else
4951 s3cmd ls s3://${S3_BUCKET} --recursive | while read -r line; do
@@ -58,6 +60,71 @@ function clean_s3bucket() {
5860 fi
5961 fi
6062 done
63+
64+ # Handle sub-daily backup consolidation for files older than CONSOLIDATE_AFTER_DAYS
65+ CONSOLIDATE_AFTER=$( echo " $CONSOLIDATE_AFTER_DAYS " | awk ' {print $1}' )
66+ if [[ ${CONSOLIDATE_AFTER} -gt 0 ]]; then
67+ echo " Consolidating S3 backups older than ${CONSOLIDATE_AFTER_DAYS} (keeping 1 backup per day)" >> ${CONSOLE_LOGGING_OUTPUT}
68+
69+ # Find all backup files older than CONSOLIDATE_AFTER days (excluding globals.sql)
70+ # Group by database + creation date (YYYY-MM-DD), keep earliest per day
71+ declare -A files_by_db_date
72+ while IFS= read -r line; do
73+ createDate=$( echo $line | awk {' print $1' })
74+ fileName=$( echo $line | awk {' print $4' })
75+
76+ # Skip globals.sql and non-backup files
77+ if [[ " $fileName " == * " globals.sql" * ]] || [[ ! " $fileName " =~ \. (dmp| sql)(\. gz)? ]]; then
78+ continue
79+ fi
80+
81+ createDateSeconds=$( date -d" $createDate " +%s)
82+ olderThanConsolidate=$( date -d" ${CONSOLIDATE_AFTER_DAYS} ago" +%s)
83+
84+ if [[ $createDateSeconds -lt $olderThanConsolidate ]]; then
85+ # Extract DB and date part from filename (everything before time portion)
86+ # Format: {DUMPPREFIX}_{DB}.{DD-Month-YYYY-HH-MM}.dmp.gz
87+ filename=$( basename " $fileName " )
88+ db_date_key=$( echo " $filename " | sed -n ' s/\(.*\)-[0-9]\{2\}-[0-9]\{2\}\.\(dmp\|sql\).*/\1/p' )
89+
90+ if [[ -n " $db_date_key " ]]; then
91+ # Keep the first file encountered for each database+date (since sorted chronologically, this is the earliest)
92+ if [[ -z " ${files_by_db_date[$db_date_key]} " ]]; then
93+ files_by_db_date[" $db_date_key " ]=" $createDateSeconds $fileName "
94+ fi
95+ fi
96+ fi
97+ done < <( s3cmd ls s3://${S3_BUCKET} --recursive 2> /dev/null | sort -k1,2)
98+
99+ # Delete all files except the one we're keeping per database+date
100+ while IFS= read -r line; do
101+ createDate=$( echo $line | awk {' print $1' })
102+ fileName=$( echo $line | awk {' print $4' })
103+
104+ # Skip globals.sql and non-backup files
105+ if [[ " $fileName " == * " globals.sql" * ]] || [[ ! " $fileName " =~ \. (dmp| sql)(\. gz)? ]]; then
106+ continue
107+ fi
108+
109+ createDateSeconds=$( date -d" $createDate " +%s)
110+ olderThanConsolidate=$( date -d" ${CONSOLIDATE_AFTER_DAYS} ago" +%s)
111+
112+ if [[ $createDateSeconds -lt $olderThanConsolidate ]]; then
113+ # Extract DB and date part from filename (everything before time portion)
114+ # Format: {DUMPPREFIX}_{DB}.{DD-Month-YYYY-HH-MM}.dmp.gz
115+ filename=$( basename " $fileName " )
116+ db_date_key=$( echo " $filename " | sed -n ' s/\(.*\)-[0-9]\{2\}-[0-9]\{2\}\.\(dmp\|sql\).*/\1/p' )
117+
118+ if [[ -n " $db_date_key " ]] && [[ -n " ${files_by_db_date[$db_date_key]:- } " ]]; then
119+ kept_file=$( echo " ${files_by_db_date[$db_date_key]} " | awk ' {print $2}' )
120+ if [[ -n " $kept_file " ]] && [[ " $fileName " != " $kept_file " ]]; then
121+ echo " Deleting S3 backup: $fileName (keeping only one per day per database)" >> ${CONSOLE_LOGGING_OUTPUT}
122+ s3cmd del " $fileName "
123+ fi
124+ fi
125+ fi
126+ done < <( s3cmd ls s3://${S3_BUCKET} --recursive 2> /dev/null | sort -k1,2)
127+ fi
61128 fi
62129}
63130
@@ -284,6 +351,6 @@ if [ "${REMOVE_BEFORE:-}" ]; then
284351 remove_files
285352 elif [[ ${STORAGE_BACKEND} == " S3" ]]; then
286353 # Credits https://shout.setfive.com/2011/12/05/deleting-files-older-than-specified-time-with-s3cmd-and-bash/
287- clean_s3bucket " ${BUCKET} " " ${REMOVE_BEFORE} days"
354+ clean_s3bucket " ${BUCKET} " " ${REMOVE_BEFORE} days" " ${CONSOLIDATE_AFTER :- 0} days "
288355 fi
289356fi
0 commit comments