Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 49 additions & 30 deletions postgres-appliance/scripts/postgres_backup.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -o pipefail

function log
{
echo "$(date "+%Y-%m-%d %H:%M:%S.%3N") - $0 - $*"
Expand All @@ -9,9 +11,9 @@ function log

log "I was called as: $0 $*"


readonly PGDATA=$1
DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN
BACKUP_NUM_TO_RETAIN=${BACKUP_NUM_TO_RETAIN:-2}
DAYS_TO_RETAIN=${DAYS_TO_RETAIN:-$BACKUP_NUM_TO_RETAIN}

IN_RECOVERY=$(psql -tXqAc "select pg_catalog.pg_is_in_recovery()")
readonly IN_RECOVERY
Expand All @@ -23,46 +25,63 @@ else
log "ERROR: Recovery state unknown: $IN_RECOVERY" && exit 1
fi

export WALG_COMPRESSION_METHOD="${WALG_BACKUP_COMPRESSION_METHOD:-$WALE_BACKUP_COMPRESSION_METHOD}"
export PGHOST=/var/run/postgresql
export WALG_COMPRESSION_METHOD="${WALG_BACKUP_COMPRESSION_METHOD:-${WALE_BACKUP_COMPRESSION_METHOD:-$WALG_COMPRESSION_METHOD}}"
export PGHOST=/run/postgresql

# Exponential backoff config
BASE_DELAY=900 # Starting delay: 15 minutes
MAX_RETRIES=3 # Total number of retries (excluding initial attempt)

# push a new base backup
log "producing a new backup"
# We reduce the priority of the backup for CPU consumption
nice -n 5 wal-g backup-push "$PGDATA"
# Loop for initial attempt + retries
for ((i=0; i<=MAX_RETRIES; i++)); do
log "Producing a new backup (Attempt $((i+1)) of $((MAX_RETRIES+1)))..."

# Collect all backups and sort them by modification time
mapfile -t backup_records < <(wal-g backup-list 2>/dev/null |
sed '0,/^\(backup_\)\?name\s*\(last_\)\?modified\s*/d' |
awk '{ print $1, $2 }' |
sort -k2r
)
# Run the backup command
nice -n 5 wal-g backup-push "$PGDATA"
EXIT_CODE=$?
if [ $EXIT_CODE -eq 0 ]; then
log "Backup successful on attempt $((i+1))."
break
else
log "Backup failed with exit code $EXIT_CODE."

# If we have used up all retries, fail the script
if [[ $i -eq $MAX_RETRIES ]]; then
log "ERROR: All backup attempts failed. Exiting."
exit $EXIT_CODE
fi

# Calculate exponential wait: BASE_DELAY * (2^i)
WAIT_TIME=$(( BASE_DELAY * (2**i) ))
log "Retrying in $((WAIT_TIME/60)) minutes..."
sleep "$WAIT_TIME"
fi
done

# leave at least 2 days base backups and/or 2 backups
[[ "$BACKUP_NUM_TO_RETAIN" -lt 2 ]] && BACKUP_NUM_TO_RETAIN=2
[[ "$DAYS_TO_RETAIN" -lt 2 ]] && DAYS_TO_RETAIN=2

# Compute total after collection
TOTAL=${#backup_records[@]}
BEFORE=""
NOW=$(date +%s -u)
readonly NOW
unset WALG_LOG_LEVEL
unset S3_LOG_LEVEL

for ((index=BACKUP_NUM_TO_RETAIN-1; index<TOTAL; index++)); do
last_modified=$(date +%s -ud "${backup_records[$index]##* }")
if (( (NOW - last_modified) / 86400 >= DAYS_TO_RETAIN )); then
BEFORE="${backup_records[$index]%% *}"
break
fi
done
TARGET_BACKUP=$(wal-g backup-list --json 2>/dev/null | jq -r \
--argjson min_count "$BACKUP_NUM_TO_RETAIN" \
--argjson days "$DAYS_TO_RETAIN" \
'sort_by(.time) | reverse
| .[$min_count - 1:]
| map(select((now - (.time | sub("\\.[0-9]+Z$"; "Z") | fromdateiso8601)) >= ($days * 86400)))
| first
| .backup_name // ""')

if [[ -z $BEFORE ]]; then
if [[ -z "$TARGET_BACKUP" ]]; then
log "No backups older than $DAYS_TO_RETAIN days found, not deleting any"
exit 0
fi

if [[ $TOTAL -gt $BACKUP_NUM_TO_RETAIN ]]; then
wal-g delete before FIND_FULL "$BEFORE" --confirm
if [[ -n "$TARGET_BACKUP" ]]; then
log "Found target boundary backup: $TARGET_BACKUP"
wal-g delete before FIND_FULL "$TARGET_BACKUP" --confirm
else
log "There are only $TOTAL backups, not deleting any"
log "No backups found eligible for deletion."
fi