11import argparse
22from collections import OrderedDict
3- from datetime import datetime , timezone , timedelta
3+ from datetime import datetime , timezone
44import logging
55from operator import attrgetter
66import os
7+ import itertools
78
89from ._common import with_repository , Highlander
910from ..archive import Archive
1011from ..cache import Cache
1112from ..constants import * # NOQA
12- from ..helpers import ArchiveFormatter , interval , sig_int , ProgressIndicatorPercent , CommandError , Error
13- from ..helpers import archivename_validator
13+ from ..helpers import interval , int_or_interval , sig_int , archivename_validator
14+ from ..helpers import ArchiveFormatter , ProgressIndicatorPercent , CommandError , Error
1415from ..manifest import Manifest
1516
1617from ..logger import create_logger
1718
1819logger = create_logger ()
1920
2021
21- def prune_within (archives , seconds , kept_because ):
22- target = datetime .now (timezone .utc ) - timedelta (seconds = seconds )
23- kept_counter = 0
24- result = []
25- for a in archives :
26- if a .ts > target :
27- kept_counter += 1
28- kept_because [a .id ] = ("within" , kept_counter )
29- result .append (a )
30- return result
22+ # The *_period_func group of functions create period grouping keys to group together archives falling within a certain
23+ # period. Among archives in each of these groups, only the latest (by creation timestamp) is kept.
3124
3225
33- def default_period_func (pattern ):
26+ def unique_period_func ():
27+ counter = itertools .count ()
28+
29+ def unique_values (_a ):
30+ """Group archives by an incrementing counter, practically making each archive a group of 1"""
31+ return next (counter )
32+
33+ return unique_values
34+
35+
36+ def pattern_period_func (pattern ):
3437 def inner (a ):
38+ """Group archives by extracting given strftime-pattern from their creation timestamp"""
3539 # compute in local timezone
3640 return a .ts .astimezone ().strftime (pattern )
3741
3842 return inner
3943
4044
4145def quarterly_13weekly_period_func (a ):
46+ """Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp"""
4247 (year , week , _ ) = a .ts .astimezone ().isocalendar () # local time
4348 if week <= 13 :
4449 # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
@@ -60,6 +65,7 @@ def quarterly_13weekly_period_func(a):
6065
6166
6267def quarterly_3monthly_period_func (a ):
68+ """Group archives by extracting the 3-month quarter from their creation timestamp"""
6369 lt = a .ts .astimezone () # local time
6470 if lt .month <= 3 :
6571 # 1-1 to 3-31
@@ -77,51 +83,77 @@ def quarterly_3monthly_period_func(a):
7783
7884PRUNING_PATTERNS = OrderedDict (
7985 [
80- ("secondly" , default_period_func ("%Y-%m-%d %H:%M:%S" )),
81- ("minutely" , default_period_func ("%Y-%m-%d %H:%M" )),
82- ("hourly" , default_period_func ("%Y-%m-%d %H" )),
83- ("daily" , default_period_func ("%Y-%m-%d" )),
84- ("weekly" , default_period_func ("%G-%V" )),
85- ("monthly" , default_period_func ("%Y-%m" )),
86+ # Each archive is considered for keeping
87+ ("within" , unique_period_func ()),
88+ ("last" , unique_period_func ()),
89+ ("keep" , unique_period_func ()),
90+ # Last archive (by creation timestamp) within period group is consiedered for keeping
91+ ("secondly" , pattern_period_func ("%Y-%m-%d %H:%M:%S" )),
92+ ("minutely" , pattern_period_func ("%Y-%m-%d %H:%M" )),
93+ ("hourly" , pattern_period_func ("%Y-%m-%d %H" )),
94+ ("daily" , pattern_period_func ("%Y-%m-%d" )),
95+ ("weekly" , pattern_period_func ("%G-%V" )),
96+ ("monthly" , pattern_period_func ("%Y-%m" )),
8697 ("quarterly_13weekly" , quarterly_13weekly_period_func ),
8798 ("quarterly_3monthly" , quarterly_3monthly_period_func ),
88- ("yearly" , default_period_func ("%Y" )),
99+ ("yearly" , pattern_period_func ("%Y" )),
89100 ]
90101)
91102
92103
93- def prune_split (archives , rule , n , kept_because = None ):
94- last = None
104+ # Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset.
105+ DATETIME_MIN_WITH_ZONE = datetime .min .replace (tzinfo = timezone .utc )
106+
107+
108+ def prune_split (archives , rule , n_or_interval , base_timestamp , kept_because = {}):
109+ if isinstance (n_or_interval , int ):
110+ n , earliest_timestamp = n_or_interval , None
111+ else :
112+ n , earliest_timestamp = None , base_timestamp - n_or_interval
113+
114+ def can_retain (a , keep ):
115+ if n is not None :
116+ return len (keep ) < n
117+ else :
118+ return a .ts > earliest_timestamp
119+
95120 keep = []
96- period_func = PRUNING_PATTERNS [rule ]
97- if kept_because is None :
98- kept_because = {}
99- if n == 0 :
121+ if n == 0 or len (archives ) == 0 :
100122 return keep
101123
102124 a = None
103- for a in sorted (archives , key = attrgetter ("ts" ), reverse = True ):
125+ last = None
126+ period_func = PRUNING_PATTERNS [rule ]
127+ sorted_archives = sorted (archives , key = attrgetter ("ts" ), reverse = True )
128+ for a in sorted_archives :
129+ if not can_retain (a , keep ):
130+ break
104131 period = period_func (a )
105132 if period != last :
106133 last = period
107134 if a .id not in kept_because :
108135 keep .append (a )
109136 kept_because [a .id ] = (rule , len (keep ))
110- if len (keep ) == n :
111- break
137+
112138 # Keep oldest archive if we didn't reach the target retention count
113- if a is not None and len (keep ) < n and a .id not in kept_because :
139+ a = sorted_archives [- 1 ]
140+ if a is not None and a .id not in kept_because and can_retain (a , keep ):
114141 keep .append (a )
115142 kept_because [a .id ] = (rule + "[oldest]" , len (keep ))
143+
116144 return keep
117145
118146
119147class PruneMixIn :
120148 @with_repository (compatibility = (Manifest .Operation .DELETE ,))
121149 def do_prune (self , args , repository , manifest ):
122150 """Prune repository archives according to specified rules"""
123- if not any (
124- (
151+ if all (
152+ e is None
153+ for e in (
154+ args .keep ,
155+ args .within ,
156+ args .last ,
125157 args .secondly ,
126158 args .minutely ,
127159 args .hourly ,
@@ -131,11 +163,10 @@ def do_prune(self, args, repository, manifest):
131163 args .quarterly_13weekly ,
132164 args .quarterly_3monthly ,
133165 args .yearly ,
134- args .within ,
135166 )
136167 ):
137168 raise CommandError (
138- 'At least one of the "keep-within", "keep-last", '
169+ 'At least one of the "keep", "keep -within", "keep-last", '
139170 '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
140171 '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
141172 'or "keep-yearly" settings must be specified.'
@@ -159,15 +190,12 @@ def do_prune(self, args, repository, manifest):
159190 # (<rulename>, <how many archives were kept by this rule so far >)
160191 kept_because = {}
161192
162- # find archives which need to be kept because of the keep-within rule
163- if args .within :
164- keep += prune_within (archives , args .within , kept_because )
165-
193+ base_timestamp = datetime .now ().astimezone ()
166194 # find archives which need to be kept because of the various time period rules
167195 for rule in PRUNING_PATTERNS .keys ():
168- num = getattr (args , rule , None )
169- if num is not None :
170- keep += prune_split (archives , rule , num , kept_because )
196+ num_or_interval = getattr (args , rule , None )
197+ if num_or_interval is not None :
198+ keep += prune_split (archives , rule , num_or_interval , base_timestamp , kept_because )
171199
172200 to_delete = set (archives ) - set (keep )
173201 with Cache (repository , manifest , iec = args .iec ) as cache :
@@ -310,81 +338,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
310338 help = "keep all archives within this time interval" ,
311339 )
312340 subparser .add_argument (
313- "--keep-last" ,
341+ "--keep-last" , dest = "last" , type = int , action = Highlander , help = "number of archives to keep"
342+ )
343+ subparser .add_argument (
344+ "--keep" ,
345+ dest = "keep" ,
346+ type = int_or_interval ,
347+ action = Highlander ,
348+ help = "number or time interval of archives to keep" ,
349+ )
350+ subparser .add_argument (
314351 "--keep-secondly" ,
315352 dest = "secondly" ,
316- type = int ,
317- default = 0 ,
353+ type = int_or_interval ,
318354 action = Highlander ,
319- help = "number of secondly archives to keep" ,
355+ help = "number or time interval of secondly archives to keep" ,
320356 )
321357 subparser .add_argument (
322358 "--keep-minutely" ,
323359 dest = "minutely" ,
324- type = int ,
325- default = 0 ,
360+ type = int_or_interval ,
326361 action = Highlander ,
327- help = "number of minutely archives to keep" ,
362+ help = "number or time interval of minutely archives to keep" ,
328363 )
329364 subparser .add_argument (
330365 "-H" ,
331366 "--keep-hourly" ,
332367 dest = "hourly" ,
333- type = int ,
334- default = 0 ,
368+ type = int_or_interval ,
335369 action = Highlander ,
336- help = "number of hourly archives to keep" ,
370+ help = "number or time interval of hourly archives to keep" ,
337371 )
338372 subparser .add_argument (
339373 "-d" ,
340374 "--keep-daily" ,
341375 dest = "daily" ,
342- type = int ,
343- default = 0 ,
376+ type = int_or_interval ,
344377 action = Highlander ,
345- help = "number of daily archives to keep" ,
378+ help = "number or time interval of daily archives to keep" ,
346379 )
347380 subparser .add_argument (
348381 "-w" ,
349382 "--keep-weekly" ,
350383 dest = "weekly" ,
351- type = int ,
352- default = 0 ,
384+ type = int_or_interval ,
353385 action = Highlander ,
354- help = "number of weekly archives to keep" ,
386+ help = "number or time interval of weekly archives to keep" ,
355387 )
356388 subparser .add_argument (
357389 "-m" ,
358390 "--keep-monthly" ,
359391 dest = "monthly" ,
360- type = int ,
361- default = 0 ,
392+ type = int_or_interval ,
362393 action = Highlander ,
363- help = "number of monthly archives to keep" ,
394+ help = "number or time interval of monthly archives to keep" ,
364395 )
365396 quarterly_group = subparser .add_mutually_exclusive_group ()
366397 quarterly_group .add_argument (
367398 "--keep-13weekly" ,
368399 dest = "quarterly_13weekly" ,
369- type = int ,
370- default = 0 ,
371- help = "number of quarterly archives to keep (13 week strategy)" ,
400+ type = int_or_interval ,
401+ help = "number or time interval of quarterly archives to keep (13 week strategy)" ,
372402 )
373403 quarterly_group .add_argument (
374404 "--keep-3monthly" ,
375405 dest = "quarterly_3monthly" ,
376- type = int ,
377- default = 0 ,
378- help = "number of quarterly archives to keep (3 month strategy)" ,
406+ type = int_or_interval ,
407+ help = "number or time interval of quarterly archives to keep (3 month strategy)" ,
379408 )
380409 subparser .add_argument (
381410 "-y" ,
382411 "--keep-yearly" ,
383412 dest = "yearly" ,
384- type = int ,
385- default = 0 ,
413+ type = int_or_interval ,
386414 action = Highlander ,
387- help = "number of yearly archives to keep" ,
415+ help = "number or time interval of yearly archives to keep" ,
388416 )
389417 define_archive_filters_group (subparser , sort_by = False , first_last = False )
390418 subparser .add_argument (
0 commit comments