Skip to content

Commit 9929575

Browse files
authored
Merge pull request #22 from secondlife/signal/detect-type
Add support for detecting archives by signature
2 parents 64aefb4 + fd88cf7 commit 9929575

10 files changed

+126
-42
lines changed

autobuild/archive_utils.py

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import multiprocessing
2+
import tarfile
3+
import zipfile
4+
5+
class ArchiveType:
6+
GZ = "gz"
7+
BZ2 = "bz2"
8+
ZIP = "zip"
9+
ZST = "zst"
10+
11+
12+
# File signatures used for sniffing archive type
13+
# https://www.garykessler.net/library/file_sigs.html
14+
_ARCHIVE_MAGIC_NUMBERS = {
15+
b"\x1f\x8b\x08": ArchiveType.GZ,
16+
b"\x42\x5a\x68": ArchiveType.BZ2,
17+
b"\x50\x4b\x03\x04": ArchiveType.ZIP,
18+
b"\x28\xb5\x2f\xfd": ArchiveType.ZST,
19+
}
20+
21+
_ARCHIVE_MAGIC_NUMBERS_MAX = max(len(x) for x in _ARCHIVE_MAGIC_NUMBERS)
22+
23+
24+
def _archive_type_from_signature(filename: str):
25+
"""Sniff archive type using file signature"""
26+
with open(filename, "rb") as f:
27+
head = f.read(_ARCHIVE_MAGIC_NUMBERS_MAX)
28+
for magic, f_type in _ARCHIVE_MAGIC_NUMBERS.items():
29+
if head.startswith(magic):
30+
return f_type
31+
return None
32+
33+
34+
def _archive_type_from_extension(filename: str):
35+
if filename.endswith(".tar.gz"):
36+
return ArchiveType.GZ
37+
if filename.endswith(".tar.bz2"):
38+
return ArchiveType.BZ2
39+
if filename.endswith(".tar.zst"):
40+
return ArchiveType.ZST
41+
if filename.endswith(".zip"):
42+
return ArchiveType.ZIP
43+
return None
44+
45+
46+
def detect_archive_type(filename: str):
47+
"""Given a filename, detect its ArchiveType using file extension and signature."""
48+
f_type = _archive_type_from_extension(filename)
49+
if f_type:
50+
return f_type
51+
return _archive_type_from_signature(filename)
52+
53+
54+
def open_archive(filename: str) -> tarfile.TarFile | zipfile.ZipFile:
55+
f_type = detect_archive_type(filename)
56+
57+
if f_type == ArchiveType.ZST:
58+
return ZstdTarFile(filename, "r")
59+
60+
if f_type == ArchiveType.ZIP:
61+
return zipfile.ZipFile(filename, "r")
62+
63+
return tarfile.open(filename, "r")
64+
65+
66+
class ZstdTarFile(tarfile.TarFile):
67+
def __init__(self, name, mode='r', *, level=4, zstd_dict=None, **kwargs):
68+
from pyzstd import CParameter, ZstdFile
69+
zstdoption = None
70+
if mode != 'r' and mode != 'rb':
71+
zstdoption = {CParameter.compressionLevel : level,
72+
CParameter.nbWorkers : multiprocessing.cpu_count(),
73+
CParameter.checksumFlag : 1}
74+
self.zstd_file = ZstdFile(name, mode,
75+
level_or_option=zstdoption,
76+
zstd_dict=zstd_dict)
77+
try:
78+
super().__init__(fileobj=self.zstd_file, mode=mode, **kwargs)
79+
except:
80+
self.zstd_file.close()
81+
raise
82+
83+
def close(self):
84+
try:
85+
super().close()
86+
finally:
87+
self.zstd_file.close()

autobuild/autobuild_tool_install.py

+3-13
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@
1414
import os
1515
import pprint
1616
import sys
17-
import tarfile
1817
import urllib.error
1918
import urllib.parse
2019
import urllib.request
21-
import zipfile
2220

2321
from autobuild import autobuild_base, common, configfile
2422
from autobuild.autobuild_tool_source_environment import get_enriched_environment
2523
from autobuild.hash_algorithms import verify_hash
24+
from autobuild import archive_utils
2625

2726
logger = logging.getLogger('autobuild.install')
2827

@@ -408,7 +407,7 @@ def _install_binary(configured_name, platform, package, config_file, install_dir
408407

409408
def get_metadata_from_package(package_file) -> configfile.MetadataDescription:
410409
try:
411-
with open_archive(package_file) as archive:
410+
with archive_utils.open_archive(package_file) as archive:
412411
f = archive.extractfile(configfile.PACKAGE_METADATA_FILE)
413412
return configfile.MetadataDescription(stream=f)
414413
except (FileNotFoundError, KeyError):
@@ -442,15 +441,6 @@ def _default_metadata_for_package(package_file: str, package = None):
442441
return metadata
443442

444443

445-
def open_archive(filename: str) -> tarfile.TarFile | zipfile.ZipFile:
446-
if filename.endswith(".tar.zst"):
447-
return common.ZstdTarFile(filename, "r")
448-
elif filename.endswith(".zip"):
449-
return zipfile.ZipFile(filename, "r")
450-
else:
451-
return tarfile.open(filename, "r")
452-
453-
454444
class ExtractPackageResults:
455445
files: list[str]
456446
conflicts: list[str]
@@ -468,7 +458,7 @@ def raise_conflicts(self):
468458

469459

470460
def extract_package(package_file: str, install_dir: str, dry_run: bool = False) -> ExtractPackageResults:
471-
with open_archive(package_file) as archive:
461+
with archive_utils.open_archive(package_file) as archive:
472462
results = ExtractPackageResults()
473463
for t in archive:
474464
if t.name == configfile.PACKAGE_METADATA_FILE:

autobuild/autobuild_tool_package.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
import getpass
2424
import glob
25-
import hashlib
2625
import json
2726
import logging
2827
import os
@@ -32,7 +31,7 @@
3231
from collections import UserDict
3332
from zipfile import ZIP_DEFLATED, ZipFile
3433

35-
from autobuild import autobuild_base, common, configfile
34+
from autobuild import autobuild_base, common, configfile, archive_utils
3635
from autobuild.common import AutobuildError
3736

3837
logger = logging.getLogger('autobuild.package')
@@ -306,7 +305,7 @@ def _create_tarfile(tarfilename, format, build_directory, filelist, results: dic
306305
tfile = tarfile.open(tarfilename, 'w:gz')
307306
elif format == 'tzst':
308307
tarfilename = tarfilename + '.tar.zst'
309-
tfile = common.ZstdTarFile(tarfilename, 'w', level=22)
308+
tfile = archive_utils.ZstdTarFile(tarfilename, 'w', level=22)
310309
else:
311310
raise PackageError("unknown tar archive format: %s" % format)
312311

autobuild/common.py

-24
Original file line numberDiff line numberDiff line change
@@ -524,27 +524,3 @@ def has_cmd(name, subcmd: str = "help") -> bool:
524524
except OSError:
525525
return False
526526
return not p.returncode
527-
528-
529-
class ZstdTarFile(tarfile.TarFile):
530-
def __init__(self, name, mode='r', *, level=4, zstd_dict=None, **kwargs):
531-
from pyzstd import CParameter, ZstdFile
532-
zstdoption = None
533-
if mode != 'r' and mode != 'rb':
534-
zstdoption = {CParameter.compressionLevel : level,
535-
CParameter.nbWorkers : multiprocessing.cpu_count(),
536-
CParameter.checksumFlag : 1}
537-
self.zstd_file = ZstdFile(name, mode,
538-
level_or_option=zstdoption,
539-
zstd_dict=zstd_dict)
540-
try:
541-
super().__init__(fileobj=self.zstd_file, mode=mode, **kwargs)
542-
except:
543-
self.zstd_file.close()
544-
raise
545-
546-
def close(self):
547-
try:
548-
super().close()
549-
finally:
550-
self.zstd_file.close()

tests/data/archive.tar.bz2

174 Bytes
Binary file not shown.

tests/data/archive.tar.gz

173 Bytes
Binary file not shown.

tests/data/archive.tar.zst

146 Bytes
Binary file not shown.

tests/data/archive.zip

330 Bytes
Binary file not shown.

tests/test_filetype.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import shutil
2+
from os import path
3+
from pathlib import Path
4+
from tests.basetest import temp_dir
5+
6+
import pytest
7+
from autobuild import archive_utils
8+
9+
10+
_DATA_DIR = Path(__file__).parent / "data"
11+
12+
_ARCHIVE_TEST_CASES = (
13+
(path.join(_DATA_DIR, "archive.tar.bz2"), archive_utils.ArchiveType.BZ2),
14+
(path.join(_DATA_DIR, "archive.tar.gz"), archive_utils.ArchiveType.GZ),
15+
(path.join(_DATA_DIR, "archive.tar.zst"), archive_utils.ArchiveType.ZST),
16+
(path.join(_DATA_DIR, "archive.zip"), archive_utils.ArchiveType.ZIP),
17+
)
18+
19+
20+
@pytest.mark.parametrize("filename,expected_type", _ARCHIVE_TEST_CASES)
21+
def test_detect_from_extension(filename, expected_type):
22+
f_type = archive_utils.detect_archive_type(filename)
23+
assert f_type == expected_type
24+
25+
26+
@pytest.mark.parametrize("filename,expected_type", _ARCHIVE_TEST_CASES)
27+
def test_detect_from_signature(filename, expected_type):
28+
with temp_dir() as dir:
29+
filename_no_ext = str(Path(dir) / "archive")
30+
shutil.copyfile(filename, filename_no_ext)
31+
f_type = archive_utils.detect_archive_type(filename_no_ext)
32+
assert f_type == expected_type

tests/test_package.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from zipfile import ZipFile
1010

1111
import autobuild.autobuild_tool_package as package
12-
from autobuild import common, configfile
12+
from autobuild import common, configfile, archive_utils
1313
from tests.basetest import BaseTest, CaptureStdout, ExpectError, clean_dir, clean_file
1414

1515
# ****************************************************************************
@@ -76,7 +76,7 @@ def tearDown(self):
7676

7777
def tar_has_expected(self,tar):
7878
if 'tar.zst' in tar:
79-
tarball = common.ZstdTarFile(tar, 'r')
79+
tarball = archive_utils.ZstdTarFile(tar, 'r')
8080
else:
8181
tarball = tarfile.open(tar, 'r')
8282
packaged_files=tarball.getnames()

0 commit comments

Comments
 (0)