Skip to content

Commit 34340c4

Browse files
committed
Always purge all but one incomplete backups
1 parent 3c72aa5 commit 34340c4

File tree

3 files changed

+63
-6
lines changed

3 files changed

+63
-6
lines changed

medusa/purge.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,16 @@ def main(config, max_backup_age=0, max_backup_count=0):
3838
logging.info('Listing backups for {}'.format(config.storage.fqdn))
3939
backup_index = storage.list_backup_index_blobs()
4040
backups = list(storage.list_node_backups(fqdn=config.storage.fqdn, backup_index_blobs=backup_index))
41+
42+
# split backups by completion
43+
complete_backups, incomplete_to_purge = backups_to_purge_by_completion(backups)
44+
backups_to_purge |= set(incomplete_to_purge)
45+
4146
# list all backups to purge based on date conditions
42-
backups_to_purge |= set(backups_to_purge_by_age(backups, max_backup_age))
47+
backups_to_purge |= set(backups_to_purge_by_age(complete_backups, max_backup_age))
4348
# list all backups to purge based on count conditions
44-
backups_to_purge |= set(backups_to_purge_by_count(backups, max_backup_count))
49+
backups_to_purge |= set(backups_to_purge_by_count(complete_backups, max_backup_count))
50+
4551
# purge all candidate backups
4652
object_counts = purge_backups(
4753
storage, backups_to_purge, config.storage.backup_grace_period_in_days, config.storage.fqdn
@@ -62,6 +68,17 @@ def main(config, max_backup_age=0, max_backup_count=0):
6268
sys.exit(1)
6369

6470

71+
def backups_to_purge_by_completion(backups):
72+
complete, incomplete = [], []
73+
for backup in backups:
74+
complete.append(backup) if backup.finished is not None else incomplete.append(backup)
75+
76+
# keep the most recent one because it might be in progress
77+
incomplete_to_purge = set(incomplete[:-1]) if len(incomplete) > 1 else set()
78+
79+
return complete, incomplete_to_purge
80+
81+
6582
def backups_to_purge_by_age(backups, max_backup_age):
6683
if max_backup_age > 0:
6784
max_date = datetime.now() - timedelta(days=max_backup_age)

tests/purge_test.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020

2121
from medusa.config import MedusaConfig, StorageConfig, _namedtuple_from_dict
2222
from medusa.storage import Storage
23-
from medusa.purge import backups_to_purge_by_age, backups_to_purge_by_count, backups_to_purge_by_name
23+
from medusa.purge import (
24+
backups_to_purge_by_age, backups_to_purge_by_count, backups_to_purge_by_name, backups_to_purge_by_completion
25+
)
2426
from medusa.purge import filter_differential_backups, filter_files_within_gc_grace
2527

2628
from tests.storage_test import make_node_backup, make_cluster_backup, make_blob
@@ -91,6 +93,43 @@ def test_purge_backups_by_count(self):
9193
obsolete_backups = backups_to_purge_by_count(backups, 40)
9294
assert len(obsolete_backups) == 0
9395

96+
def test_purge_backups_by_completion(self):
97+
backups = list()
98+
99+
# Build a list of 40 bi-daily backups, making every second backup incomplete
100+
complete = True
101+
now = datetime.now()
102+
for i in range(0, 80, 2):
103+
file_time = now + timedelta(days=(i + 1) - 80)
104+
backups.append(make_node_backup(self.storage, str(i), file_time, differential=True, complete=complete))
105+
complete = not complete
106+
107+
self.assertEqual(40, len(backups))
108+
complete_backup_names = {nb.name for nb in filter(lambda nb: nb.finished is not None, backups)}
109+
self.assertEqual(len(complete_backup_names), 20, "The amount of complete backups is not correct")
110+
111+
# the base with all 40 backups
112+
complete, incomplete_to_purge = backups_to_purge_by_completion(backups)
113+
self.assertEqual(20, len(complete)) # 1 is kept because it might be in progress
114+
self.assertEqual(19, len(incomplete_to_purge)) # 1 is kept because it might be in progress
115+
116+
# take all complete backups, but only half of the incomplete ones
117+
test_backups = list()
118+
for i in range(0, 40, 1):
119+
# take each complete backup
120+
if backups[i].finished is not None:
121+
test_backups.append(backups[i])
122+
continue
123+
# but only first half of the incomplete ones
124+
if i > 20:
125+
continue
126+
test_backups.append(backups[i])
127+
self.assertEqual(20, len(list(filter(lambda b: b.finished is not None, test_backups))))
128+
self.assertEqual(10, len(list(filter(lambda b: b.finished is None, test_backups))))
129+
complete, incomplete_to_purge = backups_to_purge_by_completion(test_backups)
130+
self.assertEqual(20, len(complete)) # 1 is kept because it might be in progress
131+
self.assertEqual(9, len(incomplete_to_purge)) # 1 is kept because it might be in progress
132+
94133
def test_filter_differential_backups(self):
95134
backups = list()
96135
backups.append(make_node_backup(self.storage, "one", datetime.now(), differential=True))

tests/storage_test.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def test_get_table_prefix(self):
455455
self.assertEqual('prefix/localhost/data/', self.storage._get_table_prefix('prefix', 'localhost'))
456456

457457

458-
def make_node_backup(storage, name, backup_date, differential=False, fqdn="localhost"):
458+
def make_node_backup(storage, name, backup_date, differential=False, fqdn="localhost", complete=True):
459459
if differential is True:
460460
differential_blob = make_blob("localhost/{}/meta/differential".format(name), backup_date.timestamp())
461461
else:
@@ -464,9 +464,10 @@ def make_node_backup(storage, name, backup_date, differential=False, fqdn="local
464464
schema_blob = make_blob("localhost/{}/meta/schema.cql".format(name), backup_date.timestamp())
465465
manifest_blob = make_blob("localhost/{}/meta/manifest.json".format(name), backup_date.timestamp())
466466
return NodeBackup(storage=storage, fqdn=fqdn, name=str(name),
467-
differential_blob=differential_blob, manifest_blob=manifest_blob,
467+
differential_blob=differential_blob, manifest_blob=manifest_blob if complete else None,
468468
tokenmap_blob=tokenmap_blob, schema_blob=schema_blob,
469-
started_timestamp=backup_date.timestamp(), finished_timestamp=backup_date.timestamp())
469+
started_timestamp=backup_date.timestamp(),
470+
finished_timestamp=backup_date.timestamp() if complete else None)
470471

471472

472473
def make_cluster_backup(storage, name, backup_date, nodes, differential=False):

0 commit comments

Comments
 (0)