Skip to content

Commit a15c05d

Browse files
committed
improved prune report
1 parent e2508dd commit a15c05d

7 files changed

+149
-54
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@ src/packages
4242
utils/upload/packages
4343
utils/upload/__pycache__
4444
utils/upload/.generate
45+
utils/upload/.setDateSpread

src/Tetrifact.Core/DateTimeSpanExtensions.cs

+16
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@ public static string ToIso(this DateTime date)
2525

2626
return iso
2727
.Substring(0, iso.Length - 3); // remove sec
28+
}
29+
30+
/// <summary>
31+
/// Converts to yyyy-mm-dd ISO
32+
/// </summary>
33+
/// <param name="date"></param>
34+
/// <returns></returns>
35+
public static string ToIsoShort(this DateTime date)
36+
{
37+
string iso = date
38+
.ToLocalTime()
39+
.ToString("s") // convert to ymdhms
40+
.Replace("T", " "); // replace T after ymd
41+
42+
return iso
43+
.Substring(0, iso.Length - 9); // remove all time data
2844
}
2945

3046
public static string ToIsoFSFriendly(this DateTime date)

src/Tetrifact.Core/PruneBracket.cs

+21-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
namespace Tetrifact.Core
22
{
33
public class PruneBracket
4-
{
4+
{
5+
#region PROPERTIES
6+
57
/// <summary>
68
/// Number of packages to keep for the given period
79
/// </summary>
@@ -12,16 +14,29 @@ public class PruneBracket
1214
/// </summary>
1315
public int Days { get; set; }
1416

15-
public PruneBracketGrouping Grouping { get; set; }
16-
17+
/// <summary>
18+
///
19+
/// </summary>
20+
public PruneBracketGrouping Grouping { get; set; }
21+
22+
#endregion
23+
24+
#region CTORS
25+
1726
public PruneBracket()
1827
{
1928
Grouping = PruneBracketGrouping.Grouped;
20-
}
29+
}
30+
31+
#endregion
32+
33+
#region METHODS
2134

2235
public override string ToString()
2336
{
24-
return $"{Days} days {Amount} packages";
25-
}
37+
return $"Covers {Days} day(s), allows {Amount} package(s), {Grouping}";
38+
}
39+
40+
#endregion
2641
}
2742
}

src/Tetrifact.Core/PruneBracketProcess.cs

+24-4
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,35 @@
1-
using System;
1+
using Newtonsoft.Json.Bson;
2+
using System;
23
using System.Collections.Generic;
3-
4+
45
namespace Tetrifact.Core
56
{
67
public class PruneBracketProcess : PruneBracket
78
{
8-
public IList<Manifest> Keep {get ; set; } = new List<Manifest>();
9+
public IList<Manifest> Keep { get ; set; } = new List<Manifest>();
910

1011
public IList<Manifest> Prune { get; set; } = new List<Manifest>();
1112

12-
public DateTime Floor { get; set; }
13+
public TimeSpan Coverage { get; set; }
14+
15+
/// <summary>
16+
/// Days back in time from Now that bracket covers. Calculated at start of a given prune run.
17+
/// </summary>
18+
public DateTime Floor { get; set; }
19+
20+
public DateTime Ceiling { get; set; }
21+
22+
public override string ToString()
23+
{
24+
return $"Ceiling:{this.Ceiling.ToIsoShort()} floor:{this.Floor.ToIsoShort()} (from {this.Ceiling.Ago()} ago to {this.Floor.Ago()} ago), {base.ToString()}";
25+
}
26+
27+
public bool Contains(DateTime date)
28+
{
29+
// note that we use compare only, not equal. Still trying to trace issues with packages being aggressively deleted by landing
30+
// in multiple brackets, for erring on side of caution. Packages that fail to match a bracket will always be kept.
31+
return date < this.Ceiling && date > this.Floor;
32+
}
1333

1434
public static PruneBracketProcess FromPruneBracket(PruneBracket pruneBracket)
1535
{

src/Tetrifact.Core/PruneService.cs

+70-38
Original file line numberDiff line numberDiff line change
@@ -92,64 +92,67 @@ public PrunePlan GeneratePrunePlan()
9292
.ToList();
9393

9494
IList<string> taggedKeep = new List<string>();
95-
IList<string> newKeep = new List<string>();
9695
IList<string> report = new List<string>();
9796

98-
int unhandled = 0;
99-
100-
report.Add(" ******************************** Prune audit start **********************************");
97+
int ignoringNoBracketCount = 0;
10198

10299
IList<string> packageIds = _indexReader.GetAllPackageIds().ToList();
103100
packageIds = packageIds.OrderBy(n => Guid.NewGuid()).ToList(); // randomize collection order
104101

105102
DateTime utcNow = _timeprovider.GetUtcNow();
106-
foreach(PruneBracketProcess pruneBracketProcess in processBrackets)
107-
pruneBracketProcess.Floor = utcNow.AddDays(-1 * pruneBracketProcess.Days);
103+
DateTime ceiling = utcNow;
104+
foreach(PruneBracketProcess pruneBracketProcess in processBrackets)
105+
{
106+
pruneBracketProcess.Ceiling = ceiling;
107+
pruneBracketProcess.Floor = ceiling.AddDays(-1 * pruneBracketProcess.Days);
108+
ceiling = pruneBracketProcess.Floor;
109+
}
108110

109111
int startingPackageCount = packageIds.Count;
110112

111-
report.Add($"Server currently contains {packageIds.Count} packages.");
113+
report.Add($"Server currently holds {packageIds.Count} packages.");
112114

113115
foreach (string packageId in packageIds)
114116
{
115117
Manifest manifest = _indexReader.GetManifestHead(packageId);
118+
report.Add(string.Empty);
116119

117120
if (manifest == null)
118121
{
119122
_log.LogWarning($"Expected manifest for package {packageId} was not found, skipping.");
120123
continue;
121-
}
122-
124+
}
125+
123126
string flattenedTags = manifest.Tags.Count == 0 ? string.Empty : $"Tags : {string.Join(",", manifest.Tags)}";
127+
flattenedTags = string.IsNullOrEmpty(flattenedTags) ? "Package is untagged": $"Tagged with : {flattenedTags}";
128+
124129
int ageInDays = (int)Math.Round((utcNow - manifest.CreatedUtc).TotalDays, 0);
125-
report.Add($"Analysing {packageId}, added {manifest.CreatedUtc.ToIso()} ({ageInDays} days ago). Tagged with: {flattenedTags}");
130+
report.Add($"Analysing package \"{packageId}\", added {manifest.CreatedUtc.ToIso()} ({ageInDays} days ago). {flattenedTags}.");
126131

127-
PruneBracketProcess matchingBracket = processBrackets.FirstOrDefault(b => manifest.CreatedUtc < b.Floor);
132+
PruneBracketProcess matchingBracket = processBrackets.FirstOrDefault(bracket => bracket.Contains(manifest.CreatedUtc));
128133
if (matchingBracket == null)
129134
{
130-
report.Add($"{packageId}, created {manifest.CreatedUtc.ToIso()}, does not land in any prune bracket, will be kept.");
131-
unhandled ++;
135+
report.Add($"Package \"{packageId}\" doesn't fit into any prune brackets, will be kept.");
136+
ignoringNoBracketCount ++;
132137
continue;
133138
}
134139

135-
report.Add($"{packageId}, created {manifest.CreatedUtc.ToIso()}, lands in prune bracket {matchingBracket.Days}Days.");
136-
137140
// try to find reasons to keep package
138141

139142
// packages can be tagged to never be deleted. This ignores keep count, but will push out packages that are not tagged
140-
bool isTaggedKeep = manifest.Tags.Any(tag => _settings.PruneIgnoreTags.Any(protectedTag => protectedTag.Equals(tag)));
141-
if (isTaggedKeep)
143+
IEnumerable<string> keepTagsOnPackage = manifest.Tags.Where(tag => _settings.PruneIgnoreTags.Any(protectedTag => protectedTag.Equals(tag)));
144+
if (keepTagsOnPackage.Any())
142145
{
143146
taggedKeep.Add(packageId);
144147
matchingBracket.Keep.Add(manifest);
145-
report.Add($"{packageId} marked for keep based on tag.");
148+
report.Add($"Package \"{packageId}\" marked for keep based on tag(s) {string.Join(",", keepTagsOnPackage)}.");
146149
continue;
147150
}
148151

149152
// "group strategy" - the entire bracket is treated as one big bag
150153
if (matchingBracket.Grouping == PruneBracketGrouping.Grouped && matchingBracket.Keep.Count < matchingBracket.Amount)
151154
{
152-
report.Add($"{packageId} marked for keep, {matchingBracket.Keep.Count} packages kept so far.");
155+
report.Add($"Package \"{packageId}\" marked for keep based on its date grouping (bracket {matchingBracket}) {matchingBracket.Amount - matchingBracket.Keep.Count} slots left in bracket.");
153156
matchingBracket.Keep.Add(manifest);
154157
continue;
155158
}
@@ -158,8 +161,10 @@ public PrunePlan GeneratePrunePlan()
158161
if (matchingBracket.Grouping == PruneBracketGrouping.Daily)
159162
{
160163
int code = manifest.CreatedUtc.ToDayCode();
161-
if (matchingBracket.Keep.Count(m => m.CreatedUtc.ToDayCode() == code) < matchingBracket.Amount)
164+
int kept = matchingBracket.Keep.Count(m => m.CreatedUtc.ToDayCode() == code);
165+
if (kept < matchingBracket.Amount)
162166
{
167+
report.Add($"Package \"{packageId}\" marked for keep, bracket {matchingBracket.Days} had {matchingBracket.Amount - kept} slots left for day {code}.");
163168
matchingBracket.Keep.Add(manifest);
164169
continue;
165170
}
@@ -169,8 +174,10 @@ public PrunePlan GeneratePrunePlan()
169174
if (matchingBracket.Grouping == PruneBracketGrouping.Weekly)
170175
{
171176
int code = manifest.CreatedUtc.ToWeekCode();
172-
if (matchingBracket.Keep.Count(m => m.CreatedUtc.ToWeekCode() == code) < matchingBracket.Amount)
177+
int kept = matchingBracket.Keep.Count(m => m.CreatedUtc.ToWeekCode() == code);
178+
if (kept < matchingBracket.Amount)
173179
{
180+
report.Add($"Package \"{packageId}\" marked for keep, bracket {matchingBracket.Days} had {matchingBracket.Amount - kept} slots left for week {code}.");
174181
matchingBracket.Keep.Add(manifest);
175182
continue;
176183
}
@@ -180,53 +187,78 @@ public PrunePlan GeneratePrunePlan()
180187
if (matchingBracket.Grouping == PruneBracketGrouping.Monthly)
181188
{
182189
int code = manifest.CreatedUtc.ToMonthCode();
183-
if (matchingBracket.Keep.Count(m => m.CreatedUtc.ToMonthCode() == code) < matchingBracket.Amount)
190+
int kept = matchingBracket.Keep.Count(m => m.CreatedUtc.ToMonthCode() == code);
191+
if (kept < matchingBracket.Amount)
184192
{
193+
report.Add($"Package \"{packageId}\" marked for keep, bracket {matchingBracket.Days} had {matchingBracket.Amount - kept} slots left for month {code}.");
185194
matchingBracket.Keep.Add(manifest);
186195
continue;
187196
}
188197
}
189198

190-
// bracket is on x packages per month basis
199+
// bracket is on x packages per year basis
191200
if (matchingBracket.Grouping == PruneBracketGrouping.Yearly)
192201
{
193-
int code = manifest.CreatedUtc.ToMonthCode();
194-
if (matchingBracket.Keep.Count(m => m.CreatedUtc.ToMonthCode() == code) < matchingBracket.Amount)
202+
int code = manifest.CreatedUtc.Year;
203+
int kept = matchingBracket.Keep.Count(m => m.CreatedUtc.Year == code);
204+
if (kept < matchingBracket.Amount)
195205
{
206+
report.Add($"Package \"{packageId}\" marked for keep, bracket {matchingBracket.Days} had {matchingBracket.Amount - kept} slots left for year {code}.");
196207
matchingBracket.Keep.Add(manifest);
197208
continue;
198209
}
199210
}
200211

201-
202212
// no reasons found, prune package
203213
matchingBracket.Prune.Add(manifest);
204-
report.Add($"{packageId} marked for prune, {matchingBracket.Keep.Count} packages already kept.");
214+
report.Add($"Package \"{packageId}\" failed to pass any keep tests, marked for prune.");
215+
205216
} // for each
206217

207218
string pruneIdList = string.Empty;
208219
if (packageIds.Count > 0)
209220
pruneIdList = $" ({string.Join(",", packageIds)})";
210221

211222
report.Add(string.Empty);
212-
report.Add($"Pre-weekly ignore count is {newKeep.Count()} - {string.Join(",", newKeep)}");
213-
report.Add($"Unhandled: {unhandled}");
214-
215-
if (taggedKeep.Count > 0)
216-
report.Add($"Kept due to tagging - {string.Join(",", taggedKeep)}.");
217-
218-
foreach(PruneBracketProcess p in processBrackets)
219-
report.Add($"Bracket {p}, keeping {p.Keep.Count} packages ({string.Join(",",p.Keep)}), pruning {p.Prune.Count} packages ({string.Join(",", p.Prune)})");
220-
221-
report.Add(string.Empty);
222-
report.Add(" ******************************** Prune audit end **********************************");
223+
224+
int totalKeep = 0;
225+
int totalPrune = 0;
226+
227+
foreach (PruneBracketProcess p in processBrackets)
228+
{
229+
totalKeep += p.Keep.Count;
230+
totalPrune += p.Prune.Count;
231+
report.Add($"Bracket {p}.");
232+
report.Add($"Keeping {p.Keep.Count}{FlattenList(p.Keep.Select(p => p.Id))}.");
233+
report.Add($"Pruning {p.Prune.Count}{FlattenList(p.Prune.Select(p => p.Id))}.");
234+
report.Add(string.Empty);
235+
}
236+
237+
if (taggedKeep.Any())
238+
report.Add($"Kept {taggedKeep.Count} packages because of tag matches{FlattenList(taggedKeep)}.");
239+
else
240+
report.Add("No packages were kept due to tag matching. Note that packages need to fall into a bracket first before keep tagging rules are applied.");
241+
242+
report.Add($"Total packages in system:{packageIds.Count}, no bracket match:{ignoringNoBracketCount}, pruning:{totalPrune}, keeping:{totalKeep}.");
243+
244+
int totalHandled = totalKeep + totalPrune + ignoringNoBracketCount;
245+
if (packageIds.Count != totalHandled)
246+
report.Add($"ERROR : Package handling count error, expected {packageIds.Count}, got {totalHandled}.");
223247

224248
return new PrunePlan{
225249
Report = report,
226250
Brackets = processBrackets
227251
};
228252
}
229253

254+
private string FlattenList(IEnumerable<object> packages)
255+
{
256+
if (!packages.Any())
257+
return string.Empty;
258+
259+
return $" ({string.Join(",", packages)})";
260+
}
261+
230262
#endregion
231263
}
232264
}

utils/upload/setDateSpread.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,18 @@
33
import glob
44
import json
55
import datetime
6+
import argparse
7+
from importlib.machinery import SourceFileLoader
68

7-
work_dir='./../../src/Tetrifact.Web/bin/Debug/net6.0/data/'
8-
packagesPerDay=5
9+
loader = SourceFileLoader('loader', './vars.py').load_module()
10+
11+
argParser = argparse.ArgumentParser()
12+
argParser.add_argument('--work_dir', default='./../../src/Tetrifact.Web/bin/Debug/net6.0/data/')
13+
argParser.add_argument('--packages_per_day', default=5)
14+
args = loader.mergeFromFile('.setDateSpread', vars(argParser.parse_args()))
15+
16+
work_dir = args['work_dir']
17+
packages_per_day = args['packages_per_day']
918

1019
def loadJson(filepath):
1120
import json
@@ -38,9 +47,12 @@ def writeJson(filepath, dataObject):
3847
date = datetime.datetime.now()
3948

4049
for package in packages:
41-
date = date + datetime.timedelta(days = daysBack)
4250

43-
for day in range(packagesPerDay):
51+
date = datetime.datetime.now() + datetime.timedelta(days = -1*daysBack)
52+
daysBack = daysBack + 1
53+
print(f'Setting package date back to {str(date)}')
54+
55+
for day in range(packages_per_day):
4456
manifestPath = os.path.join(package, 'manifest.json')
4557
manifest = loadJson(manifestPath)
4658
manifest['CreatedUtc'] = str(date)
@@ -53,4 +65,3 @@ def writeJson(filepath, dataObject):
5365

5466
print(f'Updated package {package}')
5567

56-
daysBack += 1

utils/upload/uploadAll.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import sys
1010
from pathlib import Path
1111

12-
pause=1 # seconds
12+
pause=0 # seconds
1313
zipPath = './content.zip'
1414
packages = glob.glob(f'./packages/*.zip')
1515
server_address='http://localhost:5000'

0 commit comments

Comments
 (0)