Skip to content

Commit 6af21f1

Browse files
authored
Merge pull request #126 from n-studio/fix/s3-consolidate-after
2 parents b52dbd3 + d221304 commit 6af21f1

File tree

2 files changed

+74
-2
lines changed

2 files changed

+74
-2
lines changed

scripts/backups.sh

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ function backup_monitoring() {
4343
function clean_s3bucket() {
4444
S3_BUCKET="$1"
4545
DEL_DAYS="$2"
46-
if [[ $(s3cmd ls s3://${BUCKET} 2>&1 | grep -q 'NoSuchBucket' ) ]];then
46+
CONSOLIDATE_AFTER_DAYS="$3"
47+
48+
if s3cmd ls s3://${S3_BUCKET} 2>&1 | grep -q 'NoSuchBucket'; then
4749
echo "buckets empty , no cleaning needed"
4850
else
4951
s3cmd ls s3://${S3_BUCKET} --recursive | while read -r line; do
@@ -58,6 +60,71 @@ function clean_s3bucket() {
5860
fi
5961
fi
6062
done
63+
64+
# Handle sub-daily backup consolidation for files older than CONSOLIDATE_AFTER_DAYS
65+
CONSOLIDATE_AFTER=$(echo "$CONSOLIDATE_AFTER_DAYS" | awk '{print $1}')
66+
if [[ ${CONSOLIDATE_AFTER} -gt 0 ]]; then
67+
echo "Consolidating S3 backups older than ${CONSOLIDATE_AFTER_DAYS} (keeping 1 backup per day)" >> ${CONSOLE_LOGGING_OUTPUT}
68+
69+
# Find all backup files older than CONSOLIDATE_AFTER days (excluding globals.sql)
70+
# Group by database + creation date (YYYY-MM-DD), keep earliest per day
71+
declare -A files_by_db_date
72+
while IFS= read -r line; do
73+
createDate=$(echo $line | awk {'print $1'})
74+
fileName=$(echo $line | awk {'print $4'})
75+
76+
# Skip globals.sql and non-backup files
77+
if [[ "$fileName" == *"globals.sql"* ]] || [[ ! "$fileName" =~ \.(dmp|sql)(\.gz)? ]]; then
78+
continue
79+
fi
80+
81+
createDateSeconds=$(date -d"$createDate" +%s)
82+
olderThanConsolidate=$(date -d"${CONSOLIDATE_AFTER_DAYS} ago" +%s)
83+
84+
if [[ $createDateSeconds -lt $olderThanConsolidate ]]; then
85+
# Extract DB and date part from filename (everything before time portion)
86+
# Format: {DUMPPREFIX}_{DB}.{DD-Month-YYYY-HH-MM}.dmp.gz
87+
filename=$(basename "$fileName")
88+
db_date_key=$(echo "$filename" | sed -n 's/\(.*\)-[0-9]\{2\}-[0-9]\{2\}\.\(dmp\|sql\).*/\1/p')
89+
90+
if [[ -n "$db_date_key" ]]; then
91+
# Keep the first file encountered for each database+date (since sorted chronologically, this is the earliest)
92+
if [[ -z "${files_by_db_date[$db_date_key]}" ]]; then
93+
files_by_db_date["$db_date_key"]="$createDateSeconds $fileName"
94+
fi
95+
fi
96+
fi
97+
done < <(s3cmd ls s3://${S3_BUCKET} --recursive 2>/dev/null | sort -k1,2)
98+
99+
# Delete all files except the one we're keeping per database+date
100+
while IFS= read -r line; do
101+
createDate=$(echo $line | awk {'print $1'})
102+
fileName=$(echo $line | awk {'print $4'})
103+
104+
# Skip globals.sql and non-backup files
105+
if [[ "$fileName" == *"globals.sql"* ]] || [[ ! "$fileName" =~ \.(dmp|sql)(\.gz)? ]]; then
106+
continue
107+
fi
108+
109+
createDateSeconds=$(date -d"$createDate" +%s)
110+
olderThanConsolidate=$(date -d"${CONSOLIDATE_AFTER_DAYS} ago" +%s)
111+
112+
if [[ $createDateSeconds -lt $olderThanConsolidate ]]; then
113+
# Extract DB and date part from filename (everything before time portion)
114+
# Format: {DUMPPREFIX}_{DB}.{DD-Month-YYYY-HH-MM}.dmp.gz
115+
filename=$(basename "$fileName")
116+
db_date_key=$(echo "$filename" | sed -n 's/\(.*\)-[0-9]\{2\}-[0-9]\{2\}\.\(dmp\|sql\).*/\1/p')
117+
118+
if [[ -n "$db_date_key" ]] && [[ -n "${files_by_db_date[$db_date_key]:-}" ]]; then
119+
kept_file=$(echo "${files_by_db_date[$db_date_key]}" | awk '{print $2}')
120+
if [[ -n "$kept_file" ]] && [[ "$fileName" != "$kept_file" ]]; then
121+
echo "Deleting S3 backup: $fileName (keeping only one per day per database)" >> ${CONSOLE_LOGGING_OUTPUT}
122+
s3cmd del "$fileName"
123+
fi
124+
fi
125+
fi
126+
done < <(s3cmd ls s3://${S3_BUCKET} --recursive 2>/dev/null | sort -k1,2)
127+
fi
61128
fi
62129
}
63130

@@ -284,6 +351,6 @@ if [ "${REMOVE_BEFORE:-}" ]; then
284351
remove_files
285352
elif [[ ${STORAGE_BACKEND} == "S3" ]]; then
286353
# Credits https://shout.setfive.com/2011/12/05/deleting-files-older-than-specified-time-with-s3cmd-and-bash/
287-
clean_s3bucket "${BUCKET}" "${REMOVE_BEFORE} days"
354+
clean_s3bucket "${BUCKET}" "${REMOVE_BEFORE} days" "${CONSOLIDATE_AFTER:-0} days"
288355
fi
289356
fi

scripts/start.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ if [ -z "${REMOVE_BEFORE}" ]; then
104104
REMOVE_BEFORE=
105105
fi
106106

107+
if [ -z "${CONSOLIDATE_AFTER}" ]; then
108+
CONSOLIDATE_AFTER=
109+
fi
110+
107111

108112
if [ -z "${PG_CONN_PARAMETERS}" ]; then
109113
PG_CONN_PARAMETERS="-h ${POSTGRES_HOST} -p ${POSTGRES_PORT} -U ${POSTGRES_USER}"
@@ -209,6 +213,7 @@ export POSTGRES_HOST=\"${POSTGRES_HOST}\"
209213
export DUMPPREFIX=\"${DUMPPREFIX}\"
210214
export ARCHIVE_FILENAME=\"${ARCHIVE_FILENAME}\"
211215
export REMOVE_BEFORE="${REMOVE_BEFORE}"
216+
export CONSOLIDATE_AFTER="${CONSOLIDATE_AFTER}"
212217
export MIN_SAVED_FILE="${MIN_SAVED_FILE}"
213218
export RUN_ONCE="${RUN_ONCE}"
214219
DB_DUMP_ENCRYPTION_PASS_PHRASE=\"${DB_DUMP_ENCRYPTION_PASS_PHRASE}\"

0 commit comments

Comments
 (0)