Skip to content

Commit 89ebc6f

Browse files
committed
archiving
- removed 7zip support, it was always a poor fit, the 7z executable works on a dir level while Tetrifact handles files. - hardened the archive generation pipeline so it no longer depends on memcached progress objects being present
1 parent 61dee05 commit 89ebc6f

20 files changed

+67
-327
lines changed

docker/Dockerfile

-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ RUN apt-get update \
1111
&& apt-get install apt-transport-https -y \
1212
&& apt-get update \
1313
&& apt-get install aspnetcore-runtime-6.0 -y \
14-
&& apt-get install p7zip-full -y \
1514
## clean up
1615
&& rm packages-microsoft-prod.deb \
1716
&& apt-get remove wget -y \
@@ -28,7 +27,6 @@ USER tetrifact
2827
# set Tetrifact default log level
2928
ENV LOGGING__LOGLEVEL__DEFAULT=Information
3029
ENV LOGGING__LOGLEVEL__Microsoft=Warning
31-
ENV SEVEN_ZIP_BINARY_PATH=/usr/lib/p7zip/7z
3230
ENV ASPNETCORE_URLS=http://*:5000
3331

3432
CMD sh -c 'cd /var/tetrifact && dotnet Tetrifact.Web.dll'

docker/docker-compose.yml

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ services:
99
environment:
1010
# This argument is REQUIRED for Tetrifact to bind properly to the container host port.
1111
ASPNETCORE_URLS : http://*:5000
12-
SEVEN_ZIP_BINARY_PATH: /usr/lib/p7zip/7z
1312
volumes:
1413
- ./data:/var/tetrifact/data/:rw
1514
ports:

docs/settings.md

-9
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,3 @@ A detailed list of settings are :
3434

3535
(TBD)
3636

37-
#### 7zip
38-
39-
Tetrifact supports 7zip as a compression method for improved performance. Requires setting properies
40-
41-
SevenZipBinaryPath : <path to 7za executable>
42-
DownloadArchiveMode : 7Zip
43-
44-
7za is the only part of 7zip that is required.
45-

src/Tetrifact.Core/ArchiveService.cs

+47-224
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ public string GetPackageArchivePath(string packageId)
5757

5858
public string GetPackageArchiveQueuePath(string packageId)
5959
{
60-
return Path.Combine(_settings.ArchiveQueuePath, $"{packageId}.json");
60+
// queue files partitioned by iso date to make string sorting easier.
61+
return Path.Combine(_settings.ArchiveQueuePath, $"{DateTime.UtcNow.ToIsoFSFriendly()}_{packageId}.json");
6162
}
6263

6364
public string GetPackageArchiveTempPath(string packageId)
@@ -142,6 +143,9 @@ public ArchiveProgressInfo GetPackageArchiveStatus(string packageId)
142143

143144
string progressCacheKey = this.GetArchiveProgressKey(packageId);
144145
ArchiveProgressInfo cachedProgress = _cache.Get<ArchiveProgressInfo>(progressCacheKey);
146+
if (cachedProgress == null)
147+
cachedProgress = new ArchiveProgressInfo();
148+
145149
return cachedProgress;
146150
}
147151

@@ -177,113 +181,6 @@ public void PurgeOldArchives()
177181
}
178182
}
179183

180-
private async Task Archive7Zip(string packageId, string archivePathTemp)
181-
{
182-
// create staging directory
183-
string tempDir1 = Path.Join(_settings.TempPath, $"__repack_{packageId}");
184-
string tempDir2 = Path.Join(_settings.TempPath, $"_repack_{packageId}");
185-
186-
const int bufSize = 6024;
187-
188-
Manifest manifest = _indexReader.GetManifest(packageId);
189-
190-
// copy all files to single Directory
191-
if (!Directory.Exists(tempDir2))
192-
{
193-
_log.LogInformation($"Archive generation : gathering files for package {packageId}");
194-
Directory.CreateDirectory(tempDir1);
195-
long cacheUpdateIncrements = manifest.Files.Count / 100;
196-
long counter = 0;
197-
198-
manifest.Files.AsParallel().WithDegreeOfParallelism(_settings.ArchiveCPUThreads).ForAll(delegate (ManifestItem file)
199-
{
200-
string targetPath = Path.Join(tempDir1, file.Path);
201-
List<string> knownDirectories = new List<string>();
202-
if (manifest.IsCompressed)
203-
{
204-
GetFileResponse fileLookup = _indexReader.GetFile(file.Id);
205-
if (fileLookup == null)
206-
throw new Exception($"Failed to find expected package file {file.Id} - repository is likely corrupt");
207-
208-
using (var storageArchive = new ZipArchive(fileLookup.Content))
209-
{
210-
ZipArchiveEntry storageArchiveEntry = storageArchive.Entries[0];
211-
using (var storageArchiveStream = storageArchiveEntry.Open())
212-
using (FileStream writeStream = new FileStream(targetPath, FileMode.Create))
213-
// copy async not used here because cannot get this delegate to block asParallel,
214-
StreamsHelper.Copy(storageArchiveStream, writeStream, bufSize);
215-
}
216-
}
217-
else
218-
{
219-
GetFileResponse fileLookup = _indexReader.GetFile(file.Id);
220-
if (fileLookup == null)
221-
throw new Exception($"Failed to find expected package file {file.Id}- repository is likely corrupt");
222-
223-
string dir = Path.GetDirectoryName(targetPath);
224-
if (!knownDirectories.Contains(dir))
225-
{
226-
Directory.CreateDirectory(dir);
227-
knownDirectories.Add(dir);
228-
}
229-
230-
// is this the fastest way of copying? benchmark
231-
using (Stream fileStream = fileLookup.Content)
232-
using (FileStream writeStream = new FileStream(targetPath, FileMode.Create))
233-
// copy async not used here because cannot get this delegate to block asParallel,
234-
StreamsHelper.Copy(fileStream, writeStream, bufSize);
235-
}
236-
237-
counter++;
238-
239-
if (cacheUpdateIncrements == 0 || counter % cacheUpdateIncrements == 0)
240-
{
241-
_log.LogInformation($"Gathering file {counter}/{manifest.Files.Count}, package \"{packageId}\".");
242-
string progressCacheKey = this.GetArchiveProgressKey(packageId);
243-
ArchiveProgressInfo progress = _cache.Get<ArchiveProgressInfo>(progressCacheKey);
244-
if (progress != null)
245-
{
246-
progress.FileCopyProgress = ((decimal)counter / (decimal)manifest.Files.Count) * 100;
247-
_cache.Set(progressCacheKey, progress);
248-
}
249-
}
250-
});
251-
252-
Directory.Move(tempDir1, tempDir2);
253-
}
254-
255-
_log.LogInformation($"Archive generation : building archive for package {packageId}");
256-
257-
// force delete temp file if it already exists, this can sometimes fail and we want an exception to be thrown to block 7zip being called.
258-
// if 7zip encounted
259-
if (_fileSystem.File.Exists(archivePathTemp))
260-
_fileSystem.File.Delete(archivePathTemp);
261-
262-
DateTime compressStart = DateTime.Now;
263-
264-
// ensure bin path exists
265-
if (!_fileSystem.File.Exists(_settings.ExternaArchivingExecutable))
266-
throw new Exception($"7zip binary not found at specified path \"{_settings.ExternaArchivingExecutable}\".");
267-
268-
_log.LogInformation($"Invoking 7z archive generation for package \"{packageId}\".");
269-
270-
// -aoa swtich forces overwriting of existing zip file should it exist
271-
string command = $"{_settings.ExternaArchivingExecutable} -aoa a -tzip -mx={_settings.ArchiveCPUThreads} -mmt=on {archivePathTemp} {tempDir2}/*";
272-
ShellResult result = Shell.Run(command, false, 3600000); // set timeout to 1 hour
273-
TimeSpan compressTaken = DateTime.Now - compressStart;
274-
275-
if (result.ExitCode == 0)
276-
{
277-
_log.LogInformation($"Archive comression with 7zip complete, took {Math.Round(compressTaken.TotalSeconds, 0)} seconds.");
278-
if (result.StdErr.Any())
279-
_log.LogError($"Archive comression with 7zip succeeded, but with errors. Took {Math.Round(compressTaken.TotalSeconds, 0)} seconds. {string.Join("", result.StdErr)}");
280-
}
281-
else
282-
{
283-
_log.LogError($"Archive comression with 7zip failed, took {Math.Round(compressTaken.TotalSeconds, 0)} seconds. {string.Join("", result.StdErr)}");
284-
}
285-
}
286-
287184
private async Task ArchiveDotNetZip(string packageId, string archivePathTemp)
288185
{
289186
DateTime compressStart = DateTime.Now;
@@ -334,7 +231,7 @@ private async Task ArchiveDotNetZip(string packageId, string archivePathTemp)
334231
}
335232

336233
TimeSpan compressTaken = DateTime.Now - compressStart;
337-
_log.LogInformation($"Archive comression with default dotnet ZipArchive complete, took {Math.Round(compressTaken.TotalSeconds, 0)} seconds.");
234+
_log.LogInformation($"Archive compression with default dotnet ZipArchive complete, took {Math.Round(compressTaken.TotalSeconds, 0)} seconds.");
338235
}
339236

340237
public async Task CreateNextQueuedArchive()
@@ -343,59 +240,52 @@ public async Task CreateNextQueuedArchive()
343240
string progressCacheKey = null;
344241
ArchiveProgressInfo progress = null;
345242

346-
foreach (string queuedFile in _fileSystem.Directory.GetFiles(_settings.ArchiveQueuePath))
347-
{
348-
_log.LogInformation($"Processing archive generation for \"{queuedFile}\".");
349-
string queueFileContent = string.Empty;
350-
try
351-
{
352-
queueFileContent = _fileSystem.File.ReadAllText(queuedFile);
353-
archiveQueueInfo = JsonConvert.DeserializeObject<ArchiveQueueInfo>(queueFileContent);
354-
}
355-
catch (Exception ex)
356-
{
357-
_log.LogError($"Corrupt queue file {queuedFile}, content is \n\n{queueFileContent}\n\n. Error is: {ex}. Force deleting queued file.");
358-
try
359-
{
360-
_fileSystem.File.Delete(queuedFile);
361-
}
362-
catch (Exception ex2)
363-
{
364-
_log.LogError($"Failed to delete corrupt queue file {queuedFile}. Error is: {ex2}.");
365-
}
366-
continue;
367-
}
368-
369-
progressCacheKey = this.GetArchiveProgressKey(archiveQueueInfo.PackageId);
370-
progress = _cache.Get<ArchiveProgressInfo>(progressCacheKey);
371-
if (progress == null)
372-
{
373-
_log.LogError($"Progress object not found for archive generation package {archiveQueueInfo.PackageId}, this should not happen.");
374-
continue;
375-
}
376-
377-
if (progress.State == PackageArchiveCreationStates.Queued)
378-
break;
379-
else
380-
{
381-
// force null, this var is used as flag to determine if we have anything to process
382-
progress = null;
383-
continue;
384-
}
385-
}
386-
387-
// nothing queued, exit normally
388-
if (progress == null)
389-
return;
390-
243+
string queuedFile = _fileSystem.Directory.GetFiles(_settings.ArchiveQueuePath).OrderByDescending(f => f).FirstOrDefault();
244+
if (queuedFile == null)
245+
return;
246+
247+
_log.LogInformation($"Processing archive generation for \"{queuedFile}\".");
248+
string queueFileContent = string.Empty;
249+
250+
try
251+
{
252+
queueFileContent = _fileSystem.File.ReadAllText(queuedFile);
253+
archiveQueueInfo = JsonConvert.DeserializeObject<ArchiveQueueInfo>(queueFileContent);
254+
}
255+
catch (Exception ex)
256+
{
257+
_log.LogError($"Corrupt queue file {queuedFile}, content is \n\n{queueFileContent}\n\n. Error is: {ex}. Force deleting queued file.");
258+
try
259+
{
260+
_fileSystem.File.Delete(queuedFile);
261+
}
262+
catch (Exception ex2)
263+
{
264+
_log.LogError($"Failed to delete corrupt queue file {queuedFile}. Error is: {ex2}.");
265+
}
266+
return;
267+
}
268+
269+
progressCacheKey = this.GetArchiveProgressKey(archiveQueueInfo.PackageId);
270+
progress = _cache.Get<ArchiveProgressInfo>(progressCacheKey);
271+
if (progress == null)
272+
progress = new ArchiveProgressInfo
273+
{
274+
PackageId = archiveQueueInfo.PackageId,
275+
QueuedUtc = archiveQueueInfo.QueuedUtc
276+
};
277+
391278
progress.State = PackageArchiveCreationStates.ArchiveGenerating;
392279
progress.StartedUtc = DateTime.UtcNow;
393280
_cache.Set(progressCacheKey, progress);
394281

395282
await this.CreateArchive(archiveQueueInfo.PackageId);
396283

397284
progress.State = PackageArchiveCreationStates.Processed_CleanupRequired;
398-
_cache.Set(progressCacheKey, progress);
285+
_cache.Set(progressCacheKey, progress);
286+
287+
// finally, cleanup queue file
288+
_fileSystem.File.Delete(queuedFile);
399289
}
400290

401291
public async Task CreateArchive(string packageId)
@@ -424,10 +314,7 @@ public async Task CreateArchive(string packageId)
424314

425315
try
426316
{
427-
if (_settings.ArchivingMode == ArchivingModes.SevenZip)
428-
await Archive7Zip(packageId, archivePathTemp);
429-
else
430-
await ArchiveDotNetZip(packageId, archivePathTemp);
317+
await ArchiveDotNetZip(packageId, archivePathTemp);
431318

432319
// flip temp file to final path, it is ready for use only when this happens
433320
_fileSystem.File.Move(archivePathTemp, archivePath);
@@ -436,78 +323,14 @@ public async Task CreateArchive(string packageId)
436323
}
437324
catch(Exception ex)
438325
{
439-
_log.LogError($"Package archive for {packageId} failed unexpectedly with {ex}");
326+
_log.LogError($"Package archive for {packageId} failed unexpectedly with {ex}.");
440327
}
441328
finally
442329
{
443330
_lock.Unlock(archivePathTemp);
444331
}
445332
}
446333

447-
public void CleanupNextQueuedArchive()
448-
{
449-
ArchiveQueueInfo archiveQueueInfo = null;
450-
string progressKey = null;
451-
ArchiveProgressInfo progress = null;
452-
string queueFile = null;
453-
454-
foreach (string queuedFile in _fileSystem.Directory.GetFiles(_settings.ArchiveQueuePath))
455-
{
456-
queueFile = queuedFile;
457-
458-
string queueFileContent = string.Empty;
459-
try
460-
{
461-
queueFileContent = _fileSystem.File.ReadAllText(queuedFile);
462-
archiveQueueInfo = JsonConvert.DeserializeObject<ArchiveQueueInfo>(queueFileContent);
463-
}
464-
catch (Exception ex)
465-
{
466-
_log.LogError($"Corrupt queue file {queuedFile}, content is \n\n{queueFileContent}\n\n. Error is: {ex}. Force deleting queued file.");
467-
try
468-
{
469-
_fileSystem.File.Delete(queuedFile);
470-
}
471-
catch (Exception ex2)
472-
{
473-
_log.LogError($"Failed to delete corrupt queue file {queuedFile}. Error is: {ex2}.");
474-
}
475-
continue;
476-
}
477-
478-
progressKey = this.GetArchiveProgressKey(archiveQueueInfo.PackageId);
479-
progress = _cache.Get<ArchiveProgressInfo>(progressKey);
480-
if (progress == null)
481-
{
482-
_log.LogError($"Progress object not found for archive generation package {archiveQueueInfo.PackageId}, this should not happen.");
483-
continue;
484-
}
485-
486-
if (progress.State == PackageArchiveCreationStates.Processed_CleanupRequired)
487-
break;
488-
else
489-
{
490-
// force null, this var is used as flag to determine if we have anything to process
491-
progress = null;
492-
continue;
493-
}
494-
}
495-
496-
// nothing queued, exit normally
497-
if (progress == null)
498-
return;
499-
500-
// cleanup
501-
string tempDir2 = Path.Join(_settings.TempPath, $"_repack_{archiveQueueInfo.PackageId}");
502-
if (_fileSystem.Directory.Exists(tempDir2))
503-
_fileSystem.Directory.Delete(tempDir2, true);
504-
505-
if (_fileSystem.File.Exists(queueFile))
506-
_fileSystem.File.Delete(queueFile);
507-
508-
_cache.Remove(progressKey);
509-
}
510-
511334
#endregion
512335
}
513336
}

src/Tetrifact.Core/ArchivingModes.cs

+1-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
{
33
public enum ArchivingModes
44
{
5-
Default, // internal dotnet zip compressio
6-
SevenZip // seven zip. requires external 7zip binary. 7zip support is still experimental.
5+
Default // internal dotnet zip compressio
76
}
87
}

0 commit comments

Comments
 (0)