Skip to content

Commit c097cb6

Browse files
committed
Adds optional interval support for all prune retention flags
Support is added for setting prune retention with either an int (keep n archives) or an interval (keep within). This works much like --keep-within currently does, but extends support to all retention filters. Additionally adds a generic --keep flag to take over (or live alongside) both --keep-last and --keep-within. --keep-last is no longer an alias of --keep-secondly, now keeps archives made on the same second. Comparisons against archive timestamp are made to use local timezone instead of UTC. Should be equal result in practice, but allows for easier testing with frozen local time.
1 parent 39bdfaa commit c097cb6

File tree

4 files changed

+331
-111
lines changed

4 files changed

+331
-111
lines changed

requirements.d/development.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pytest-xdist
1111
coverage[toml]
1212
pytest-cov
1313
pytest-benchmark
14+
freezegun
1415
Cython
1516
pre-commit
1617
bandit[toml]

src/borg/archiver/prune_cmd.py

Lines changed: 98 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,49 @@
11
import argparse
22
from collections import OrderedDict
3-
from datetime import datetime, timezone, timedelta
3+
from datetime import datetime, timezone
44
import logging
55
from operator import attrgetter
66
import os
7+
import itertools
78

89
from ._common import with_repository, Highlander
910
from ..archive import Archive
1011
from ..cache import Cache
1112
from ..constants import * # NOQA
12-
from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error
13-
from ..helpers import archivename_validator
13+
from ..helpers import interval, int_or_interval, sig_int, archivename_validator
14+
from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error
1415
from ..manifest import Manifest
1516

1617
from ..logger import create_logger
1718

1819
logger = create_logger()
1920

2021

21-
def prune_within(archives, seconds, kept_because):
22-
target = datetime.now(timezone.utc) - timedelta(seconds=seconds)
23-
kept_counter = 0
24-
result = []
25-
for a in archives:
26-
if a.ts > target:
27-
kept_counter += 1
28-
kept_because[a.id] = ("within", kept_counter)
29-
result.append(a)
30-
return result
22+
# The *_period_func group of functions create period grouping keys to group together archives falling within a certain
23+
# period. Among archives in each of these groups, only the latest (by creation timestamp) is kept.
3124

3225

33-
def default_period_func(pattern):
26+
def unique_period_func():
27+
counter = itertools.count()
28+
29+
def unique_values(_a):
30+
"""Group archives by an incrementing counter, practically making each archive a group of 1"""
31+
return next(counter)
32+
33+
return unique_values
34+
35+
36+
def pattern_period_func(pattern):
3437
def inner(a):
38+
"""Group archives by extracting given strftime-pattern from their creation timestamp"""
3539
# compute in local timezone
3640
return a.ts.astimezone().strftime(pattern)
3741

3842
return inner
3943

4044

4145
def quarterly_13weekly_period_func(a):
46+
"""Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp"""
4247
(year, week, _) = a.ts.astimezone().isocalendar() # local time
4348
if week <= 13:
4449
# Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
@@ -60,6 +65,7 @@ def quarterly_13weekly_period_func(a):
6065

6166

6267
def quarterly_3monthly_period_func(a):
68+
"""Group archives by extracting the 3-month quarter from their creation timestamp"""
6369
lt = a.ts.astimezone() # local time
6470
if lt.month <= 3:
6571
# 1-1 to 3-31
@@ -77,51 +83,77 @@ def quarterly_3monthly_period_func(a):
7783

7884
PRUNING_PATTERNS = OrderedDict(
7985
[
80-
("secondly", default_period_func("%Y-%m-%d %H:%M:%S")),
81-
("minutely", default_period_func("%Y-%m-%d %H:%M")),
82-
("hourly", default_period_func("%Y-%m-%d %H")),
83-
("daily", default_period_func("%Y-%m-%d")),
84-
("weekly", default_period_func("%G-%V")),
85-
("monthly", default_period_func("%Y-%m")),
86+
# Each archive is considered for keeping
87+
("within", unique_period_func()),
88+
("last", unique_period_func()),
89+
("keep", unique_period_func()),
90+
# Last archive (by creation timestamp) within period group is consiedered for keeping
91+
("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")),
92+
("minutely", pattern_period_func("%Y-%m-%d %H:%M")),
93+
("hourly", pattern_period_func("%Y-%m-%d %H")),
94+
("daily", pattern_period_func("%Y-%m-%d")),
95+
("weekly", pattern_period_func("%G-%V")),
96+
("monthly", pattern_period_func("%Y-%m")),
8697
("quarterly_13weekly", quarterly_13weekly_period_func),
8798
("quarterly_3monthly", quarterly_3monthly_period_func),
88-
("yearly", default_period_func("%Y")),
99+
("yearly", pattern_period_func("%Y")),
89100
]
90101
)
91102

92103

93-
def prune_split(archives, rule, n, kept_because=None):
94-
last = None
104+
# Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset.
105+
DATETIME_MIN_WITH_ZONE = datetime.min.replace(tzinfo=timezone.utc)
106+
107+
108+
def prune_split(archives, rule, n_or_interval, base_timestamp, kept_because={}):
109+
if isinstance(n_or_interval, int):
110+
n, earliest_timestamp = n_or_interval, None
111+
else:
112+
n, earliest_timestamp = None, base_timestamp - n_or_interval
113+
114+
def can_retain(a, keep):
115+
if n is not None:
116+
return len(keep) < n
117+
else:
118+
return a.ts > earliest_timestamp
119+
95120
keep = []
96-
period_func = PRUNING_PATTERNS[rule]
97-
if kept_because is None:
98-
kept_because = {}
99-
if n == 0:
121+
if n == 0 or len(archives) == 0:
100122
return keep
101123

102124
a = None
103-
for a in sorted(archives, key=attrgetter("ts"), reverse=True):
125+
last = None
126+
period_func = PRUNING_PATTERNS[rule]
127+
sorted_archives = sorted(archives, key=attrgetter("ts"), reverse=True)
128+
for a in sorted_archives:
129+
if not can_retain(a, keep):
130+
break
104131
period = period_func(a)
105132
if period != last:
106133
last = period
107134
if a.id not in kept_because:
108135
keep.append(a)
109136
kept_because[a.id] = (rule, len(keep))
110-
if len(keep) == n:
111-
break
137+
112138
# Keep oldest archive if we didn't reach the target retention count
113-
if a is not None and len(keep) < n and a.id not in kept_because:
139+
a = sorted_archives[-1]
140+
if a is not None and a.id not in kept_because and can_retain(a, keep):
114141
keep.append(a)
115142
kept_because[a.id] = (rule + "[oldest]", len(keep))
143+
116144
return keep
117145

118146

119147
class PruneMixIn:
120148
@with_repository(compatibility=(Manifest.Operation.DELETE,))
121149
def do_prune(self, args, repository, manifest):
122150
"""Prune archives according to specified rules."""
123-
if not any(
124-
(
151+
if all(
152+
# Needs explicit None-check as interval arg may be 0 (Falsey)
153+
e is None for e in (
154+
args.keep,
155+
args.within,
156+
args.last,
125157
args.secondly,
126158
args.minutely,
127159
args.hourly,
@@ -131,11 +163,10 @@ def do_prune(self, args, repository, manifest):
131163
args.quarterly_13weekly,
132164
args.quarterly_3monthly,
133165
args.yearly,
134-
args.within,
135166
)
136167
):
137168
raise CommandError(
138-
'At least one of the "keep-within", "keep-last", '
169+
'At least one of the "keep", "keep-within", "keep-last", '
139170
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
140171
'"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
141172
'or "keep-yearly" settings must be specified.'
@@ -159,15 +190,12 @@ def do_prune(self, args, repository, manifest):
159190
# (<rulename>, <how many archives were kept by this rule so far >)
160191
kept_because = {}
161192

162-
# find archives which need to be kept because of the keep-within rule
163-
if args.within:
164-
keep += prune_within(archives, args.within, kept_because)
165-
193+
base_timestamp = datetime.now().astimezone()
166194
# find archives which need to be kept because of the various time period rules
167195
for rule in PRUNING_PATTERNS.keys():
168-
num = getattr(args, rule, None)
169-
if num is not None:
170-
keep += prune_split(archives, rule, num, kept_because)
196+
num_or_interval = getattr(args, rule, None)
197+
if num_or_interval is not None:
198+
keep += prune_split(archives, rule, num_or_interval, base_timestamp, kept_because)
171199

172200
to_delete = set(archives) - set(keep)
173201
with Cache(repository, manifest, iec=args.iec) as cache:
@@ -312,81 +340,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
312340
help="keep all archives within this time interval",
313341
)
314342
subparser.add_argument(
315-
"--keep-last",
343+
"--keep-last", dest="last", type=int, action=Highlander, help="number of archives to keep"
344+
)
345+
subparser.add_argument(
346+
"--keep",
347+
dest="keep",
348+
type=int_or_interval,
349+
action=Highlander,
350+
help="number or time interval of archives to keep",
351+
)
352+
subparser.add_argument(
316353
"--keep-secondly",
317354
dest="secondly",
318-
type=int,
319-
default=0,
355+
type=int_or_interval,
320356
action=Highlander,
321-
help="number of secondly archives to keep",
357+
help="number or time interval of secondly archives to keep",
322358
)
323359
subparser.add_argument(
324360
"--keep-minutely",
325361
dest="minutely",
326-
type=int,
327-
default=0,
362+
type=int_or_interval,
328363
action=Highlander,
329-
help="number of minutely archives to keep",
364+
help="number or time interval of minutely archives to keep",
330365
)
331366
subparser.add_argument(
332367
"-H",
333368
"--keep-hourly",
334369
dest="hourly",
335-
type=int,
336-
default=0,
370+
type=int_or_interval,
337371
action=Highlander,
338-
help="number of hourly archives to keep",
372+
help="number or time interval of hourly archives to keep",
339373
)
340374
subparser.add_argument(
341375
"-d",
342376
"--keep-daily",
343377
dest="daily",
344-
type=int,
345-
default=0,
378+
type=int_or_interval,
346379
action=Highlander,
347-
help="number of daily archives to keep",
380+
help="number or time interval of daily archives to keep",
348381
)
349382
subparser.add_argument(
350383
"-w",
351384
"--keep-weekly",
352385
dest="weekly",
353-
type=int,
354-
default=0,
386+
type=int_or_interval,
355387
action=Highlander,
356-
help="number of weekly archives to keep",
388+
help="number or time interval of weekly archives to keep",
357389
)
358390
subparser.add_argument(
359391
"-m",
360392
"--keep-monthly",
361393
dest="monthly",
362-
type=int,
363-
default=0,
394+
type=int_or_interval,
364395
action=Highlander,
365-
help="number of monthly archives to keep",
396+
help="number or time interval of monthly archives to keep",
366397
)
367398
quarterly_group = subparser.add_mutually_exclusive_group()
368399
quarterly_group.add_argument(
369400
"--keep-13weekly",
370401
dest="quarterly_13weekly",
371-
type=int,
372-
default=0,
373-
help="number of quarterly archives to keep (13 week strategy)",
402+
type=int_or_interval,
403+
help="number or time interval of quarterly archives to keep (13 week strategy)",
374404
)
375405
quarterly_group.add_argument(
376406
"--keep-3monthly",
377407
dest="quarterly_3monthly",
378-
type=int,
379-
default=0,
380-
help="number of quarterly archives to keep (3 month strategy)",
408+
type=int_or_interval,
409+
help="number or time interval of quarterly archives to keep (3 month strategy)",
381410
)
382411
subparser.add_argument(
383412
"-y",
384413
"--keep-yearly",
385414
dest="yearly",
386-
type=int,
387-
default=0,
415+
type=int_or_interval,
388416
action=Highlander,
389-
help="number of yearly archives to keep",
417+
help="number or time interval of yearly archives to keep",
390418
)
391419
define_archive_filters_group(subparser, sort_by=False, first_last=False)
392420
subparser.add_argument(

src/borg/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@
142142
EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no
143143

144144
ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S"
145+
ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z"
145146
ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f"
147+
ISO_FORMAT_ZONE = ISO_FORMAT + "%z"
146148

147149
DASHES = "-" * 78
148150

0 commit comments

Comments
 (0)