Skip to content

Commit 3f3d39a

Browse files
committed
Adds optional interval support for all prune retention flags
Support is added for setting prune retention with either an int (keep n archives) or an interval (keep within). This works much like --keep-within currently does, but extends support to all retention filters. Additionally adds a generic --keep flag to take over (or live alongside) both --keep-last and --keep-within. --keep-last is no longer an alias of --keep-secondly, now keeps archives made on the same second. Comparisons against archive timestamp are made to use local timezone instead of UTC. Should be equal result in practice, but allows for easier testing with frozen local time.
1 parent 44cc6c8 commit 3f3d39a

File tree

4 files changed

+331
-111
lines changed

4 files changed

+331
-111
lines changed

requirements.d/development.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ pytest
1010
pytest-xdist
1111
pytest-cov
1212
pytest-benchmark
13+
freezegun
1314
Cython
1415
pre-commit
1516
bandit[toml]

src/borg/archiver/prune_cmd.py

Lines changed: 98 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,49 @@
11
import argparse
22
from collections import OrderedDict
3-
from datetime import datetime, timezone, timedelta
3+
from datetime import datetime, timezone
44
import logging
55
from operator import attrgetter
66
import os
7+
import itertools
78

89
from ._common import with_repository, Highlander
910
from ..archive import Archive
1011
from ..cache import Cache
1112
from ..constants import * # NOQA
12-
from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error
13-
from ..helpers import archivename_validator
13+
from ..helpers import interval, int_or_interval, sig_int, archivename_validator
14+
from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error
1415
from ..manifest import Manifest
1516

1617
from ..logger import create_logger
1718

1819
logger = create_logger()
1920

2021

21-
def prune_within(archives, seconds, kept_because):
22-
target = datetime.now(timezone.utc) - timedelta(seconds=seconds)
23-
kept_counter = 0
24-
result = []
25-
for a in archives:
26-
if a.ts > target:
27-
kept_counter += 1
28-
kept_because[a.id] = ("within", kept_counter)
29-
result.append(a)
30-
return result
22+
# The *_period_func group of functions create period grouping keys to group together archives falling within a certain
23+
# period. Among archives in each of these groups, only the latest (by creation timestamp) is kept.
3124

3225

33-
def default_period_func(pattern):
26+
def unique_period_func():
27+
counter = itertools.count()
28+
29+
def unique_values(_a):
30+
"""Group archives by an incrementing counter, practically making each archive a group of 1"""
31+
return next(counter)
32+
33+
return unique_values
34+
35+
36+
def pattern_period_func(pattern):
3437
def inner(a):
38+
"""Group archives by extracting given strftime-pattern from their creation timestamp"""
3539
# compute in local timezone
3640
return a.ts.astimezone().strftime(pattern)
3741

3842
return inner
3943

4044

4145
def quarterly_13weekly_period_func(a):
46+
"""Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp"""
4247
(year, week, _) = a.ts.astimezone().isocalendar() # local time
4348
if week <= 13:
4449
# Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
@@ -60,6 +65,7 @@ def quarterly_13weekly_period_func(a):
6065

6166

6267
def quarterly_3monthly_period_func(a):
68+
"""Group archives by extracting the 3-month quarter from their creation timestamp"""
6369
lt = a.ts.astimezone() # local time
6470
if lt.month <= 3:
6571
# 1-1 to 3-31
@@ -77,51 +83,77 @@ def quarterly_3monthly_period_func(a):
7783

7884
PRUNING_PATTERNS = OrderedDict(
7985
[
80-
("secondly", default_period_func("%Y-%m-%d %H:%M:%S")),
81-
("minutely", default_period_func("%Y-%m-%d %H:%M")),
82-
("hourly", default_period_func("%Y-%m-%d %H")),
83-
("daily", default_period_func("%Y-%m-%d")),
84-
("weekly", default_period_func("%G-%V")),
85-
("monthly", default_period_func("%Y-%m")),
86+
# Each archive is considered for keeping
87+
("within", unique_period_func()),
88+
("last", unique_period_func()),
89+
("keep", unique_period_func()),
90+
# Last archive (by creation timestamp) within period group is consiedered for keeping
91+
("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")),
92+
("minutely", pattern_period_func("%Y-%m-%d %H:%M")),
93+
("hourly", pattern_period_func("%Y-%m-%d %H")),
94+
("daily", pattern_period_func("%Y-%m-%d")),
95+
("weekly", pattern_period_func("%G-%V")),
96+
("monthly", pattern_period_func("%Y-%m")),
8697
("quarterly_13weekly", quarterly_13weekly_period_func),
8798
("quarterly_3monthly", quarterly_3monthly_period_func),
88-
("yearly", default_period_func("%Y")),
99+
("yearly", pattern_period_func("%Y")),
89100
]
90101
)
91102

92103

93-
def prune_split(archives, rule, n, kept_because=None):
94-
last = None
104+
# Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset.
105+
DATETIME_MIN_WITH_ZONE = datetime.min.replace(tzinfo=timezone.utc)
106+
107+
108+
def prune_split(archives, rule, n_or_interval, base_timestamp, kept_because={}):
109+
if isinstance(n_or_interval, int):
110+
n, earliest_timestamp = n_or_interval, None
111+
else:
112+
n, earliest_timestamp = None, base_timestamp - n_or_interval
113+
114+
def can_retain(a, keep):
115+
if n is not None:
116+
return len(keep) < n
117+
else:
118+
return a.ts > earliest_timestamp
119+
95120
keep = []
96-
period_func = PRUNING_PATTERNS[rule]
97-
if kept_because is None:
98-
kept_because = {}
99-
if n == 0:
121+
if n == 0 or len(archives) == 0:
100122
return keep
101123

102124
a = None
103-
for a in sorted(archives, key=attrgetter("ts"), reverse=True):
125+
last = None
126+
period_func = PRUNING_PATTERNS[rule]
127+
sorted_archives = sorted(archives, key=attrgetter("ts"), reverse=True)
128+
for a in sorted_archives:
129+
if not can_retain(a, keep):
130+
break
104131
period = period_func(a)
105132
if period != last:
106133
last = period
107134
if a.id not in kept_because:
108135
keep.append(a)
109136
kept_because[a.id] = (rule, len(keep))
110-
if len(keep) == n:
111-
break
137+
112138
# Keep oldest archive if we didn't reach the target retention count
113-
if a is not None and len(keep) < n and a.id not in kept_because:
139+
a = sorted_archives[-1]
140+
if a is not None and a.id not in kept_because and can_retain(a, keep):
114141
keep.append(a)
115142
kept_because[a.id] = (rule + "[oldest]", len(keep))
143+
116144
return keep
117145

118146

119147
class PruneMixIn:
120148
@with_repository(compatibility=(Manifest.Operation.DELETE,))
121149
def do_prune(self, args, repository, manifest):
122150
"""Prune repository archives according to specified rules"""
123-
if not any(
124-
(
151+
if all(
152+
e is None
153+
for e in (
154+
args.keep,
155+
args.within,
156+
args.last,
125157
args.secondly,
126158
args.minutely,
127159
args.hourly,
@@ -131,11 +163,10 @@ def do_prune(self, args, repository, manifest):
131163
args.quarterly_13weekly,
132164
args.quarterly_3monthly,
133165
args.yearly,
134-
args.within,
135166
)
136167
):
137168
raise CommandError(
138-
'At least one of the "keep-within", "keep-last", '
169+
'At least one of the "keep", "keep-within", "keep-last", '
139170
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
140171
'"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
141172
'or "keep-yearly" settings must be specified.'
@@ -159,15 +190,12 @@ def do_prune(self, args, repository, manifest):
159190
# (<rulename>, <how many archives were kept by this rule so far >)
160191
kept_because = {}
161192

162-
# find archives which need to be kept because of the keep-within rule
163-
if args.within:
164-
keep += prune_within(archives, args.within, kept_because)
165-
193+
base_timestamp = datetime.now().astimezone()
166194
# find archives which need to be kept because of the various time period rules
167195
for rule in PRUNING_PATTERNS.keys():
168-
num = getattr(args, rule, None)
169-
if num is not None:
170-
keep += prune_split(archives, rule, num, kept_because)
196+
num_or_interval = getattr(args, rule, None)
197+
if num_or_interval is not None:
198+
keep += prune_split(archives, rule, num_or_interval, base_timestamp, kept_because)
171199

172200
to_delete = set(archives) - set(keep)
173201
with Cache(repository, manifest, iec=args.iec) as cache:
@@ -310,81 +338,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
310338
help="keep all archives within this time interval",
311339
)
312340
subparser.add_argument(
313-
"--keep-last",
341+
"--keep-last", dest="last", type=int, action=Highlander, help="number of archives to keep"
342+
)
343+
subparser.add_argument(
344+
"--keep",
345+
dest="keep",
346+
type=int_or_interval,
347+
action=Highlander,
348+
help="number or time interval of archives to keep",
349+
)
350+
subparser.add_argument(
314351
"--keep-secondly",
315352
dest="secondly",
316-
type=int,
317-
default=0,
353+
type=int_or_interval,
318354
action=Highlander,
319-
help="number of secondly archives to keep",
355+
help="number or time interval of secondly archives to keep",
320356
)
321357
subparser.add_argument(
322358
"--keep-minutely",
323359
dest="minutely",
324-
type=int,
325-
default=0,
360+
type=int_or_interval,
326361
action=Highlander,
327-
help="number of minutely archives to keep",
362+
help="number or time interval of minutely archives to keep",
328363
)
329364
subparser.add_argument(
330365
"-H",
331366
"--keep-hourly",
332367
dest="hourly",
333-
type=int,
334-
default=0,
368+
type=int_or_interval,
335369
action=Highlander,
336-
help="number of hourly archives to keep",
370+
help="number or time interval of hourly archives to keep",
337371
)
338372
subparser.add_argument(
339373
"-d",
340374
"--keep-daily",
341375
dest="daily",
342-
type=int,
343-
default=0,
376+
type=int_or_interval,
344377
action=Highlander,
345-
help="number of daily archives to keep",
378+
help="number or time interval of daily archives to keep",
346379
)
347380
subparser.add_argument(
348381
"-w",
349382
"--keep-weekly",
350383
dest="weekly",
351-
type=int,
352-
default=0,
384+
type=int_or_interval,
353385
action=Highlander,
354-
help="number of weekly archives to keep",
386+
help="number or time interval of weekly archives to keep",
355387
)
356388
subparser.add_argument(
357389
"-m",
358390
"--keep-monthly",
359391
dest="monthly",
360-
type=int,
361-
default=0,
392+
type=int_or_interval,
362393
action=Highlander,
363-
help="number of monthly archives to keep",
394+
help="number or time interval of monthly archives to keep",
364395
)
365396
quarterly_group = subparser.add_mutually_exclusive_group()
366397
quarterly_group.add_argument(
367398
"--keep-13weekly",
368399
dest="quarterly_13weekly",
369-
type=int,
370-
default=0,
371-
help="number of quarterly archives to keep (13 week strategy)",
400+
type=int_or_interval,
401+
help="number or time interval of quarterly archives to keep (13 week strategy)",
372402
)
373403
quarterly_group.add_argument(
374404
"--keep-3monthly",
375405
dest="quarterly_3monthly",
376-
type=int,
377-
default=0,
378-
help="number of quarterly archives to keep (3 month strategy)",
406+
type=int_or_interval,
407+
help="number or time interval of quarterly archives to keep (3 month strategy)",
379408
)
380409
subparser.add_argument(
381410
"-y",
382411
"--keep-yearly",
383412
dest="yearly",
384-
type=int,
385-
default=0,
413+
type=int_or_interval,
386414
action=Highlander,
387-
help="number of yearly archives to keep",
415+
help="number or time interval of yearly archives to keep",
388416
)
389417
define_archive_filters_group(subparser, sort_by=False, first_last=False)
390418
subparser.add_argument(

src/borg/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,9 @@
137137
EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no
138138

139139
ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S"
140+
ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z"
140141
ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f"
142+
ISO_FORMAT_ZONE = ISO_FORMAT + "%z"
141143

142144
DASHES = "-" * 78
143145

0 commit comments

Comments
 (0)