11import argparse
22from collections import OrderedDict
3- from datetime import datetime , timezone , timedelta
3+ from datetime import datetime , timezone
44import logging
55from operator import attrgetter
66import os
7+ import itertools
78
89from ._common import with_repository , Highlander
910from ..archive import Archive
1011from ..cache import Cache
1112from ..constants import * # NOQA
12- from ..helpers import ArchiveFormatter , interval , sig_int , ProgressIndicatorPercent , CommandError , Error
13- from ..helpers import archivename_validator
13+ from ..helpers import interval , int_or_interval , sig_int , archivename_validator
14+ from ..helpers import ArchiveFormatter , ProgressIndicatorPercent , CommandError , Error
1415from ..manifest import Manifest
1516
1617from ..logger import create_logger
1718
1819logger = create_logger ()
1920
2021
21- def prune_within (archives , seconds , kept_because ):
22- target = datetime .now (timezone .utc ) - timedelta (seconds = seconds )
23- kept_counter = 0
24- result = []
25- for a in archives :
26- if a .ts > target :
27- kept_counter += 1
28- kept_because [a .id ] = ("within" , kept_counter )
29- result .append (a )
30- return result
22+ # The *_period_func group of functions create period grouping keys to group together archives falling within a certain
23+ # period. Among archives in each of these groups, only the latest (by creation timestamp) is kept.
3124
3225
33- def default_period_func (pattern ):
26+ def unique_period_func ():
27+ counter = itertools .count ()
28+
29+ def unique_values (_a ):
30+ """Group archives by an incrementing counter, practically making each archive a group of 1"""
31+ return next (counter )
32+
33+ return unique_values
34+
35+
36+ def pattern_period_func (pattern ):
3437 def inner (a ):
38+ """Group archives by extracting given strftime-pattern from their creation timestamp"""
3539 # compute in local timezone
3640 return a .ts .astimezone ().strftime (pattern )
3741
3842 return inner
3943
4044
4145def quarterly_13weekly_period_func (a ):
46+ """Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp"""
4247 (year , week , _ ) = a .ts .astimezone ().isocalendar () # local time
4348 if week <= 13 :
4449 # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
@@ -60,6 +65,7 @@ def quarterly_13weekly_period_func(a):
6065
6166
6267def quarterly_3monthly_period_func (a ):
68+ """Group archives by extracting the 3-month quarter from their creation timestamp"""
6369 lt = a .ts .astimezone () # local time
6470 if lt .month <= 3 :
6571 # 1-1 to 3-31
@@ -77,51 +83,77 @@ def quarterly_3monthly_period_func(a):
7783
7884PRUNING_PATTERNS = OrderedDict (
7985 [
80- ("secondly" , default_period_func ("%Y-%m-%d %H:%M:%S" )),
81- ("minutely" , default_period_func ("%Y-%m-%d %H:%M" )),
82- ("hourly" , default_period_func ("%Y-%m-%d %H" )),
83- ("daily" , default_period_func ("%Y-%m-%d" )),
84- ("weekly" , default_period_func ("%G-%V" )),
85- ("monthly" , default_period_func ("%Y-%m" )),
86+ # Each archive is considered for keeping
87+ ("within" , unique_period_func ()),
88+ ("last" , unique_period_func ()),
89+ ("keep" , unique_period_func ()),
90+ # Last archive (by creation timestamp) within period group is consiedered for keeping
91+ ("secondly" , pattern_period_func ("%Y-%m-%d %H:%M:%S" )),
92+ ("minutely" , pattern_period_func ("%Y-%m-%d %H:%M" )),
93+ ("hourly" , pattern_period_func ("%Y-%m-%d %H" )),
94+ ("daily" , pattern_period_func ("%Y-%m-%d" )),
95+ ("weekly" , pattern_period_func ("%G-%V" )),
96+ ("monthly" , pattern_period_func ("%Y-%m" )),
8697 ("quarterly_13weekly" , quarterly_13weekly_period_func ),
8798 ("quarterly_3monthly" , quarterly_3monthly_period_func ),
88- ("yearly" , default_period_func ("%Y" )),
99+ ("yearly" , pattern_period_func ("%Y" )),
89100 ]
90101)
91102
92103
93- def prune_split (archives , rule , n , kept_because = None ):
94- last = None
104+ # Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset.
105+ DATETIME_MIN_WITH_ZONE = datetime .min .replace (tzinfo = timezone .utc )
106+
107+
108+ def prune_split (archives , rule , n_or_interval , base_timestamp , kept_because = {}):
109+ if isinstance (n_or_interval , int ):
110+ n , earliest_timestamp = n_or_interval , None
111+ else :
112+ n , earliest_timestamp = None , base_timestamp - n_or_interval
113+
114+ def can_retain (a , keep ):
115+ if n is not None :
116+ return len (keep ) < n
117+ else :
118+ return a .ts > earliest_timestamp
119+
95120 keep = []
96- period_func = PRUNING_PATTERNS [rule ]
97- if kept_because is None :
98- kept_because = {}
99- if n == 0 :
121+ if n == 0 or len (archives ) == 0 :
100122 return keep
101123
102124 a = None
103- for a in sorted (archives , key = attrgetter ("ts" ), reverse = True ):
125+ last = None
126+ period_func = PRUNING_PATTERNS [rule ]
127+ sorted_archives = sorted (archives , key = attrgetter ("ts" ), reverse = True )
128+ for a in sorted_archives :
129+ if not can_retain (a , keep ):
130+ break
104131 period = period_func (a )
105132 if period != last :
106133 last = period
107134 if a .id not in kept_because :
108135 keep .append (a )
109136 kept_because [a .id ] = (rule , len (keep ))
110- if len (keep ) == n :
111- break
137+
112138 # Keep oldest archive if we didn't reach the target retention count
113- if a is not None and len (keep ) < n and a .id not in kept_because :
139+ a = sorted_archives [- 1 ]
140+ if a is not None and a .id not in kept_because and can_retain (a , keep ):
114141 keep .append (a )
115142 kept_because [a .id ] = (rule + "[oldest]" , len (keep ))
143+
116144 return keep
117145
118146
119147class PruneMixIn :
120148 @with_repository (compatibility = (Manifest .Operation .DELETE ,))
121149 def do_prune (self , args , repository , manifest ):
122150 """Prune archives according to specified rules."""
123- if not any (
124- (
151+ if all (
152+ # Needs explicit None-check as interval arg may be 0 (Falsey)
153+ e is None for e in (
154+ args .keep ,
155+ args .within ,
156+ args .last ,
125157 args .secondly ,
126158 args .minutely ,
127159 args .hourly ,
@@ -131,11 +163,10 @@ def do_prune(self, args, repository, manifest):
131163 args .quarterly_13weekly ,
132164 args .quarterly_3monthly ,
133165 args .yearly ,
134- args .within ,
135166 )
136167 ):
137168 raise CommandError (
138- 'At least one of the "keep-within", "keep-last", '
169+ 'At least one of the "keep", "keep -within", "keep-last", '
139170 '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
140171 '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
141172 'or "keep-yearly" settings must be specified.'
@@ -159,15 +190,12 @@ def do_prune(self, args, repository, manifest):
159190 # (<rulename>, <how many archives were kept by this rule so far >)
160191 kept_because = {}
161192
162- # find archives which need to be kept because of the keep-within rule
163- if args .within :
164- keep += prune_within (archives , args .within , kept_because )
165-
193+ base_timestamp = datetime .now ().astimezone ()
166194 # find archives which need to be kept because of the various time period rules
167195 for rule in PRUNING_PATTERNS .keys ():
168- num = getattr (args , rule , None )
169- if num is not None :
170- keep += prune_split (archives , rule , num , kept_because )
196+ num_or_interval = getattr (args , rule , None )
197+ if num_or_interval is not None :
198+ keep += prune_split (archives , rule , num_or_interval , base_timestamp , kept_because )
171199
172200 to_delete = set (archives ) - set (keep )
173201 with Cache (repository , manifest , iec = args .iec ) as cache :
@@ -312,81 +340,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
312340 help = "keep all archives within this time interval" ,
313341 )
314342 subparser .add_argument (
315- "--keep-last" ,
343+ "--keep-last" , dest = "last" , type = int , action = Highlander , help = "number of archives to keep"
344+ )
345+ subparser .add_argument (
346+ "--keep" ,
347+ dest = "keep" ,
348+ type = int_or_interval ,
349+ action = Highlander ,
350+ help = "number or time interval of archives to keep" ,
351+ )
352+ subparser .add_argument (
316353 "--keep-secondly" ,
317354 dest = "secondly" ,
318- type = int ,
319- default = 0 ,
355+ type = int_or_interval ,
320356 action = Highlander ,
321- help = "number of secondly archives to keep" ,
357+ help = "number or time interval of secondly archives to keep" ,
322358 )
323359 subparser .add_argument (
324360 "--keep-minutely" ,
325361 dest = "minutely" ,
326- type = int ,
327- default = 0 ,
362+ type = int_or_interval ,
328363 action = Highlander ,
329- help = "number of minutely archives to keep" ,
364+ help = "number or time interval of minutely archives to keep" ,
330365 )
331366 subparser .add_argument (
332367 "-H" ,
333368 "--keep-hourly" ,
334369 dest = "hourly" ,
335- type = int ,
336- default = 0 ,
370+ type = int_or_interval ,
337371 action = Highlander ,
338- help = "number of hourly archives to keep" ,
372+ help = "number or time interval of hourly archives to keep" ,
339373 )
340374 subparser .add_argument (
341375 "-d" ,
342376 "--keep-daily" ,
343377 dest = "daily" ,
344- type = int ,
345- default = 0 ,
378+ type = int_or_interval ,
346379 action = Highlander ,
347- help = "number of daily archives to keep" ,
380+ help = "number or time interval of daily archives to keep" ,
348381 )
349382 subparser .add_argument (
350383 "-w" ,
351384 "--keep-weekly" ,
352385 dest = "weekly" ,
353- type = int ,
354- default = 0 ,
386+ type = int_or_interval ,
355387 action = Highlander ,
356- help = "number of weekly archives to keep" ,
388+ help = "number or time interval of weekly archives to keep" ,
357389 )
358390 subparser .add_argument (
359391 "-m" ,
360392 "--keep-monthly" ,
361393 dest = "monthly" ,
362- type = int ,
363- default = 0 ,
394+ type = int_or_interval ,
364395 action = Highlander ,
365- help = "number of monthly archives to keep" ,
396+ help = "number or time interval of monthly archives to keep" ,
366397 )
367398 quarterly_group = subparser .add_mutually_exclusive_group ()
368399 quarterly_group .add_argument (
369400 "--keep-13weekly" ,
370401 dest = "quarterly_13weekly" ,
371- type = int ,
372- default = 0 ,
373- help = "number of quarterly archives to keep (13 week strategy)" ,
402+ type = int_or_interval ,
403+ help = "number or time interval of quarterly archives to keep (13 week strategy)" ,
374404 )
375405 quarterly_group .add_argument (
376406 "--keep-3monthly" ,
377407 dest = "quarterly_3monthly" ,
378- type = int ,
379- default = 0 ,
380- help = "number of quarterly archives to keep (3 month strategy)" ,
408+ type = int_or_interval ,
409+ help = "number or time interval of quarterly archives to keep (3 month strategy)" ,
381410 )
382411 subparser .add_argument (
383412 "-y" ,
384413 "--keep-yearly" ,
385414 dest = "yearly" ,
386- type = int ,
387- default = 0 ,
415+ type = int_or_interval ,
388416 action = Highlander ,
389- help = "number of yearly archives to keep" ,
417+ help = "number or time interval of yearly archives to keep" ,
390418 )
391419 define_archive_filters_group (subparser , sort_by = False , first_last = False )
392420 subparser .add_argument (
0 commit comments