From 278ea0269dfc8d3d76eee13b7bf0eec3a704f7d8 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Wed, 6 Sep 2023 15:35:14 -0400 Subject: [PATCH 01/23] Add support for zstd compression of binary packages This change adds zstd support to conan in the following ways: 1. The person or build running `conan upload` can set a config value core.upload:compression_format = zstd to upload binaries using zstd instead of gzip. 2. The zstd compression is done entirely in Python using a combination of tarfile and python-zstandard. Then the file is uploaded as normal. 3. When downloading packages, if a .tar.zst file is encountered, the extraction code uses tarfile and python-zstandard to extract. I chose python-zstandard as the library because that is what urllib3 uses. --- conans/client/cmd/uploader.py | 106 ++++++++++++++++++++------- conans/client/remote_manager.py | 30 ++++++-- conans/client/rest/rest_client_v2.py | 10 ++- conans/model/conf.py | 5 +- conans/model/manifest.py | 4 +- conans/paths/__init__.py | 1 + 6 files changed, 117 insertions(+), 39 deletions(-) diff --git a/conans/client/cmd/uploader.py b/conans/client/cmd/uploader.py index 1b367f8d28d..044fec93ed9 100644 --- a/conans/client/cmd/uploader.py +++ b/conans/client/cmd/uploader.py @@ -1,5 +1,6 @@ import os import shutil +import tarfile import time from conan.internal.conan_app import ConanApp @@ -7,7 +8,7 @@ from conans.client.source import retrieve_exports_sources from conans.errors import ConanException, NotFoundException from conans.paths import (CONAN_MANIFEST, CONANFILE, EXPORT_SOURCES_TGZ_NAME, - EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, CONANINFO) + EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME, CONANINFO) from conans.util.files import (clean_dirty, is_dirty, gather_files, gzopen_without_timestamps, set_dirty_context_manager, mkdir) @@ -165,11 +166,22 @@ def _prepare_package(self, pref, prev_bundle): def _compress_package_files(self, layout, pref): download_pkg_folder = layout.download_package() - package_tgz = os.path.join(download_pkg_folder, PACKAGE_TGZ_NAME) - if is_dirty(package_tgz): - self._output.warning("%s: Removing %s, marked as dirty" % (str(pref), PACKAGE_TGZ_NAME)) - os.remove(package_tgz) - clean_dirty(package_tgz) + compression_format = self._app.cache.new_config.get("core.upload:compression_format") or "gzip" + if compression_format == "gzip": + compress_level_config = "core.gzip:compresslevel" + package_file_name = PACKAGE_TGZ_NAME + package_file = os.path.join(download_pkg_folder, PACKAGE_TGZ_NAME) + elif compression_format == "zstd": + compress_level_config = "core.zstd:compresslevel" + package_file_name = PACKAGE_TZSTD_NAME + package_file = os.path.join(download_pkg_folder, PACKAGE_TZSTD_NAME) + else: + raise ConanException(f"Unsupported compression level '{compression_format}'") + + if is_dirty(package_file): + self._output.warning("%s: Removing %s, marked as dirty" % (str(pref), package_file_name)) + os.remove(package_file) + clean_dirty(package_file) # Get all the files in that directory # existing package, will use short paths if defined @@ -190,17 +202,21 @@ def _compress_package_files(self, layout, pref): files.pop(CONANINFO) files.pop(CONAN_MANIFEST) - if not os.path.isfile(package_tgz): + if os.path.isfile(package_file): + self._output.info(f"Not writing '{package_file}' because it already exists.") + else: if self._output and not self._output.is_terminal: self._output.info("Compressing package...") - tgz_files = {f: path for f, path in files.items()} - compresslevel = self._app.cache.new_config.get("core.gzip:compresslevel", check_type=int) - tgz_path = compress_files(tgz_files, PACKAGE_TGZ_NAME, download_pkg_folder, - compresslevel=compresslevel) - assert tgz_path == package_tgz - assert os.path.exists(package_tgz) - - return {PACKAGE_TGZ_NAME: package_tgz, + + source_files = {f: path for f, path in files.items()} + compresslevel = self._app.cache.new_config.get(compress_level_config, check_type=int) + compressed_path = compress_files(source_files, package_file_name, download_pkg_folder, + compresslevel=compresslevel, compressformat=compression_format) + + assert compressed_path == package_file + assert os.path.exists(package_file) + + return {package_file_name: package_file, CONANINFO: os.path.join(download_pkg_folder, CONANINFO), CONAN_MANIFEST: os.path.join(download_pkg_folder, CONAN_MANIFEST)} @@ -246,21 +262,55 @@ def upload_package(self, pref, prev_bundle, remote): self._output.debug(f"Upload {pref} in {duration} time") -def compress_files(files, name, dest_dir, compresslevel=None, ref=None): +def compress_files(files, name, dest_dir, compresslevel=None, ref=None, compressformat=None): t1 = time.time() - # FIXME, better write to disk sequentially and not keep tgz contents in memory - tgz_path = os.path.join(dest_dir, name) - if name in (PACKAGE_TGZ_NAME, EXPORT_SOURCES_TGZ_NAME) and len(files) > 100: - ref_name = f"{ref}:" or "" + tar_path = os.path.join(dest_dir, name) + if name in (PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME, EXPORT_SOURCES_TGZ_NAME) and len(files) > 100: + ref_name = f"{ref}:" if ref else "" ConanOutput().info(f"Compressing {ref_name}{name}") - with set_dirty_context_manager(tgz_path), open(tgz_path, "wb") as tgz_handle: - tgz = gzopen_without_timestamps(name, mode="w", fileobj=tgz_handle, - compresslevel=compresslevel) - for filename, abs_path in sorted(files.items()): - # recursive is False in case it is a symlink to a folder - tgz.add(abs_path, filename, recursive=False) - tgz.close() + + if compressformat == "zstd": + try: + import zstandard + except ModuleNotFoundError as e: + raise ConanException("zstd compression requires python-zstandard. " + "Run `pip install python-zstandard` and retry. " + f"Exception details: {e}") + + with open(tar_path, "wb") as tarfile_obj: + def reset_tarinfo(tarinfo): + """ + Resets mtime in the tarinfo for consistency with + gzopen_without_timestamps() + """ + tarinfo.mtime = 0 + return tarinfo + + # Only provide level if it was overridden by config. + zstd_kwargs = {} + if compresslevel is not None: + zstd_kwargs["level"] = compresslevel + + dctx = zstandard.ZstdCompressor(**zstd_kwargs) + + # Create a zstd stream writer so tarfile writes uncompressed data to + # the zstd stream writer, which in turn writes compressed data to the + # output tar.zst file. + with dctx.stream_writer(tarfile_obj) as stream_writer: + with tarfile.open(mode="w|", fileobj=stream_writer, + format=tarfile.PAX_FORMAT) as tar: + for filename, abs_path in sorted(files.items()): + tar.add(abs_path, filename, recursive=False, filter=reset_tarinfo) + else: + # FIXME, better write to disk sequentially and not keep tgz contents in memory + with set_dirty_context_manager(tar_path), open(tar_path, "wb") as tgz_handle: + tgz = gzopen_without_timestamps(name, mode="w", fileobj=tgz_handle, + compresslevel=compresslevel) + for filename, abs_path in sorted(files.items()): + # recursive is False in case it is a symlink to a folder + tgz.add(abs_path, filename, recursive=False) + tgz.close() duration = time.time() - t1 ConanOutput().debug(f"{name} compressed in {duration} time") - return tgz_path + return tar_path diff --git a/conans/client/remote_manager.py b/conans/client/remote_manager.py index 54287df4f10..83b9f5566c8 100644 --- a/conans/client/remote_manager.py +++ b/conans/client/remote_manager.py @@ -1,5 +1,6 @@ import os import shutil +import tarfile from typing import List from requests.exceptions import ConnectionError @@ -14,7 +15,7 @@ from conans.model.package_ref import PkgReference from conans.model.recipe_ref import RecipeReference from conans.util.files import rmdir, human_size -from conans.paths import EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME +from conans.paths import EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME from conans.util.files import mkdir, tar_extract @@ -148,14 +149,18 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata): metadata, only_metadata=False) zipped_files = {k: v for k, v in zipped_files.items() if not k.startswith(METADATA)} # quick server package integrity check: - for f in ("conaninfo.txt", "conanmanifest.txt", "conan_package.tgz"): + for f in ("conaninfo.txt", "conanmanifest.txt"): if f not in zipped_files: raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: no {f}") + accepted_package_files = [PACKAGE_TZSTD_NAME, PACKAGE_TGZ_NAME] + package_file = next((f for f in zipped_files if f in accepted_package_files), None) + if not package_file: + raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: no {accepted_package_files} found") self._signer.verify(pref, download_pkg_folder, zipped_files) - tgz_file = zipped_files.pop(PACKAGE_TGZ_NAME, None) + package_file = zipped_files.pop(package_file, None) package_folder = layout.package() - uncompress_file(tgz_file, package_folder, scope=str(pref.ref)) + uncompress_file(package_file, package_folder, scope=str(pref.ref)) mkdir(package_folder) # Just in case it doesn't exist, because uncompress did nothing for file_name, file_path in zipped_files.items(): # copy CONANINFO and CONANMANIFEST shutil.move(file_path, os.path.join(package_folder, file_name)) @@ -255,8 +260,23 @@ def uncompress_file(src_path, dest_folder, scope=None): if big_file: hs = human_size(filesize) ConanOutput(scope=scope).info(f"Decompressing {hs} {os.path.basename(src_path)}") + with open(src_path, mode='rb') as file_handler: - tar_extract(file_handler, dest_folder) + if src_path.endswith(".tar.zst"): + # Decompress using python-zstandard and tarfile. + try: + import zstandard + except ModuleNotFoundError as e: + raise ConanException("zstd decompression requires python-zstandard. " + "Run `pip install python-zstandard` and retry. " + f"Exception details: {e}") + + dctx = zstandard.ZstdDecompressor() + stream_reader = dctx.stream_reader(file_handler) + with tarfile.open(fileobj=stream_reader, mode='r|') as the_tar: + the_tar.extractall(dest_folder) + else: + tar_extract(file_handler, dest_folder) except Exception as e: error_msg = "Error while extracting downloaded file '%s' to %s\n%s\n"\ % (src_path, dest_folder, str(e)) diff --git a/conans/client/rest/rest_client_v2.py b/conans/client/rest/rest_client_v2.py index 5922d0a8ae7..bd1136e7d37 100644 --- a/conans/client/rest/rest_client_v2.py +++ b/conans/client/rest/rest_client_v2.py @@ -11,7 +11,7 @@ from conans.errors import ConanException, NotFoundException, PackageNotFoundException, \ RecipeNotFoundException, AuthenticationException, ForbiddenException from conans.model.package_ref import PkgReference -from conans.paths import EXPORT_SOURCES_TGZ_NAME +from conans.paths import EXPORT_SOURCES_TGZ_NAME, PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME from conans.util.dates import from_iso8601_to_timestamp from conans.util.thread import ExceptionThread @@ -81,8 +81,12 @@ def get_package(self, pref, dest_folder, metadata, only_metadata): result = {} # Download only known files, but not metadata (except sign) if not only_metadata: # Retrieve package first, then metadata - accepted_files = ["conaninfo.txt", "conan_package.tgz", "conanmanifest.txt", - "metadata/sign"] + accepted_package_files = [PACKAGE_TZSTD_NAME, PACKAGE_TGZ_NAME] + accepted_files = ["conaninfo.txt", "conanmanifest.txt", "metadata/sign"] + for f in accepted_package_files: + if f in server_files: + accepted_files = [f] + accepted_files + break files = [f for f in server_files if any(f.startswith(m) for m in accepted_files)] # If we didn't indicated reference, server got the latest, use absolute now, it's safer urls = {fn: self.router.package_file(pref, fn) for fn in files} diff --git a/conans/model/conf.py b/conans/model/conf.py index 1ec17b4170f..cf510bbf360 100644 --- a/conans/model/conf.py +++ b/conans/model/conf.py @@ -42,8 +42,11 @@ "core.net.http:cacert_path": "Path containing a custom Cacert file", "core.net.http:client_cert": "Path or tuple of files containing a client cert (and key)", "core.net.http:clean_system_proxy": "If defined, the proxies system env-vars will be discarded", - # Gzip compression + # Compression for `conan upload` + "core.upload:compression_format": "The compression format used when uploading Conan packages. " + "Possible values: 'zstd', 'gzip' (default=gzip)", "core.gzip:compresslevel": "The Gzip compresion level for Conan artifacts (default=9)", + "core.zstd:compresslevel": "The zstd compression level for Conan artifacts", # Tools "tools.android:ndk_path": "Argument for the CMAKE_ANDROID_NDK", "tools.android:cmake_legacy_toolchain": "Define to explicitly pass ANDROID_USE_LEGACY_TOOLCHAIN_FILE in CMake toolchain", diff --git a/conans/model/manifest.py b/conans/model/manifest.py index c3c10737f02..ae6e1a9a0a9 100644 --- a/conans/model/manifest.py +++ b/conans/model/manifest.py @@ -1,7 +1,7 @@ import os from collections import defaultdict -from conans.paths import CONAN_MANIFEST, EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME +from conans.paths import CONAN_MANIFEST, EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME from conans.util.dates import timestamp_now, timestamp_to_str from conans.util.files import load, md5, md5sum, save, gather_files @@ -91,7 +91,7 @@ def create(cls, folder, exports_sources_folder=None): """ files, _ = gather_files(folder) # The folders symlinks are discarded for the manifest - for f in (PACKAGE_TGZ_NAME, EXPORT_TGZ_NAME, CONAN_MANIFEST, EXPORT_SOURCES_TGZ_NAME): + for f in (PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME, EXPORT_TGZ_NAME, CONAN_MANIFEST, EXPORT_SOURCES_TGZ_NAME): files.pop(f, None) file_dict = {} diff --git a/conans/paths/__init__.py b/conans/paths/__init__.py index 5992053360d..b5f6fc130b1 100644 --- a/conans/paths/__init__.py +++ b/conans/paths/__init__.py @@ -58,6 +58,7 @@ def _user_home_from_conanrc_file(): CONAN_MANIFEST = "conanmanifest.txt" CONANINFO = "conaninfo.txt" PACKAGE_TGZ_NAME = "conan_package.tgz" +PACKAGE_TZSTD_NAME = "conan_package.tar.zst" EXPORT_TGZ_NAME = "conan_export.tgz" EXPORT_SOURCES_TGZ_NAME = "conan_sources.tgz" DEFAULT_PROFILE_NAME = "default" From 682b0e3a0bab425242913ca448dc8fb096c1742c Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Fri, 8 Sep 2023 08:10:18 -0400 Subject: [PATCH 02/23] Switch to include python-zstandard in the package requirements Because zstd decompression is expected to just work if the server has a .tar.zst file, I am including zstandard in requirements.txt. https://python-zstandard.readthedocs.io/en/latest/projectinfo.html#state-of-project recommends that we "Pin the package version to prevent unwanted breakage when this change occurs!", although I doubt that much will change before an eventual 1.0. --- conans/client/cmd/uploader.py | 8 +------- conans/client/remote_manager.py | 13 +++++-------- conans/requirements.txt | 1 + 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/conans/client/cmd/uploader.py b/conans/client/cmd/uploader.py index 044fec93ed9..e2b6de7a8fd 100644 --- a/conans/client/cmd/uploader.py +++ b/conans/client/cmd/uploader.py @@ -2,6 +2,7 @@ import shutil import tarfile import time +import zstandard from conan.internal.conan_app import ConanApp from conan.api.output import ConanOutput @@ -270,13 +271,6 @@ def compress_files(files, name, dest_dir, compresslevel=None, ref=None, compress ConanOutput().info(f"Compressing {ref_name}{name}") if compressformat == "zstd": - try: - import zstandard - except ModuleNotFoundError as e: - raise ConanException("zstd compression requires python-zstandard. " - "Run `pip install python-zstandard` and retry. " - f"Exception details: {e}") - with open(tar_path, "wb") as tarfile_obj: def reset_tarinfo(tarinfo): """ diff --git a/conans/client/remote_manager.py b/conans/client/remote_manager.py index 83b9f5566c8..741ae408b0e 100644 --- a/conans/client/remote_manager.py +++ b/conans/client/remote_manager.py @@ -1,6 +1,8 @@ import os import shutil import tarfile +import time +import zstandard from typing import List from requests.exceptions import ConnectionError @@ -160,7 +162,10 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata): package_file = zipped_files.pop(package_file, None) package_folder = layout.package() + t1 = time.time() uncompress_file(package_file, package_folder, scope=str(pref.ref)) + duration = time.time() - t1 + scoped_output.debug(f"Decompressed {package_file} in {duration} seconds") mkdir(package_folder) # Just in case it doesn't exist, because uncompress did nothing for file_name, file_path in zipped_files.items(): # copy CONANINFO and CONANMANIFEST shutil.move(file_path, os.path.join(package_folder, file_name)) @@ -263,14 +268,6 @@ def uncompress_file(src_path, dest_folder, scope=None): with open(src_path, mode='rb') as file_handler: if src_path.endswith(".tar.zst"): - # Decompress using python-zstandard and tarfile. - try: - import zstandard - except ModuleNotFoundError as e: - raise ConanException("zstd decompression requires python-zstandard. " - "Run `pip install python-zstandard` and retry. " - f"Exception details: {e}") - dctx = zstandard.ZstdDecompressor() stream_reader = dctx.stream_reader(file_handler) with tarfile.open(fileobj=stream_reader, mode='r|') as the_tar: diff --git a/conans/requirements.txt b/conans/requirements.txt index 4555809e47e..25f8e815d78 100644 --- a/conans/requirements.txt +++ b/conans/requirements.txt @@ -7,3 +7,4 @@ fasteners>=0.15 distro>=1.4.0, <=1.8.0; sys_platform == 'linux' or sys_platform == 'linux2' Jinja2>=3.0, <4.0.0 python-dateutil>=2.8.0, <3 +zstandard==0.21.0 From 396815c31c8b82a3606a5e0d525135b1a90c24bc Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Fri, 8 Sep 2023 13:59:09 -0400 Subject: [PATCH 03/23] Add a test case to cover zstd compress and decompress --- .../unittests/client/remote_manager_test.py | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/conans/test/unittests/client/remote_manager_test.py b/conans/test/unittests/client/remote_manager_test.py index a10f3073797..e5bd96ab362 100644 --- a/conans/test/unittests/client/remote_manager_test.py +++ b/conans/test/unittests/client/remote_manager_test.py @@ -2,14 +2,15 @@ import unittest from conans.client.cmd.uploader import compress_files -from conans.paths import PACKAGE_TGZ_NAME +from conans.client.remote_manager import uncompress_file +from conans.paths import PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME from conans.test.utils.test_files import temp_folder from conans.util.files import save class RemoteManagerTest(unittest.TestCase): - def test_compress_files(self): + def test_compress_files_tgz(self): folder = temp_folder() save(os.path.join(folder, "one_file.txt"), "The contents") save(os.path.join(folder, "Two_file.txt"), "Two contents") @@ -23,3 +24,30 @@ def test_compress_files(self): self.assertTrue(os.path.exists(path)) expected_path = os.path.join(folder, PACKAGE_TGZ_NAME) self.assertEqual(path, expected_path) + + def test_compress_and_uncompress_zst_files(self): + folder = temp_folder() + save(os.path.join(folder, "one_file.txt"), "The contents") + save(os.path.join(folder, "Two_file.txt"), "Two contents") + + files = { + "one_file.txt": os.path.join(folder, "one_file.txt"), + "Two_file.txt": os.path.join(folder, "Two_file.txt"), + } + + path = compress_files(files, PACKAGE_TZSTD_NAME, dest_dir=folder, compressformat='zstd') + self.assertTrue(os.path.exists(path)) + expected_path = os.path.join(folder, PACKAGE_TZSTD_NAME) + self.assertEqual(path, expected_path) + + extract_dir = os.path.join(folder, 'extracted') + uncompress_file(path, extract_dir) + + extract_files = list(sorted(os.listdir(extract_dir))) + expected_files = sorted(files.keys()) + self.assertEqual(extract_files, expected_files) + + for name, path in sorted(files.items()): + extract_path = os.path.join(extract_dir, name) + with open(path, 'r') as f1, open(extract_path, 'r') as f2: + self.assertEqual(f1.read(), f2.read()) From f0b7813985a8957a105c113197ee0ca11a571e14 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Fri, 8 Sep 2023 14:27:04 -0400 Subject: [PATCH 04/23] Downgrade to 0.20.0 to fix CI CI is unable to find 0.21.0 --- conans/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conans/requirements.txt b/conans/requirements.txt index 25f8e815d78..3cefc29b839 100644 --- a/conans/requirements.txt +++ b/conans/requirements.txt @@ -7,4 +7,4 @@ fasteners>=0.15 distro>=1.4.0, <=1.8.0; sys_platform == 'linux' or sys_platform == 'linux2' Jinja2>=3.0, <4.0.0 python-dateutil>=2.8.0, <3 -zstandard==0.21.0 +zstandard==0.20.0 From a33394daa263e19fa0929ed73d95eca90bdc8e87 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Mon, 11 Sep 2023 16:29:52 -0400 Subject: [PATCH 05/23] Two small improvements 1. Change requirements.txt to allow either zstandard 0.20 or 0.21. That prevents a downgrade for people who already have 0.21 installed, while also allowing CI to find 0.20. 2. Move compressformat parameter earlier in compress_files() function. It made a bit more sense to have it earlier; as long as consumers are correctly using positional kwargs, it shouldn't break anyone. --- conans/client/cmd/uploader.py | 2 +- conans/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conans/client/cmd/uploader.py b/conans/client/cmd/uploader.py index e2b6de7a8fd..c94f46bbd61 100644 --- a/conans/client/cmd/uploader.py +++ b/conans/client/cmd/uploader.py @@ -263,7 +263,7 @@ def upload_package(self, pref, prev_bundle, remote): self._output.debug(f"Upload {pref} in {duration} time") -def compress_files(files, name, dest_dir, compresslevel=None, ref=None, compressformat=None): +def compress_files(files, name, dest_dir, compressformat=None, compresslevel=None, ref=None): t1 = time.time() tar_path = os.path.join(dest_dir, name) if name in (PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME, EXPORT_SOURCES_TGZ_NAME) and len(files) > 100: diff --git a/conans/requirements.txt b/conans/requirements.txt index 3cefc29b839..f509cc9eea4 100644 --- a/conans/requirements.txt +++ b/conans/requirements.txt @@ -7,4 +7,4 @@ fasteners>=0.15 distro>=1.4.0, <=1.8.0; sys_platform == 'linux' or sys_platform == 'linux2' Jinja2>=3.0, <4.0.0 python-dateutil>=2.8.0, <3 -zstandard==0.20.0 +zstandard>=0.20, <= 0.21 From bbed1a0c9ba268a86d1a867519ff8598d8180e83 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Mon, 22 Jul 2024 10:46:51 -0400 Subject: [PATCH 06/23] Address review feedback --- conans/client/cmd/uploader.py | 28 +++++++++++++--------------- conans/client/remote_manager.py | 10 ++-------- conans/util/files.py | 20 +++++++++++++------- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/conans/client/cmd/uploader.py b/conans/client/cmd/uploader.py index c94f46bbd61..aa857fdd333 100644 --- a/conans/client/cmd/uploader.py +++ b/conans/client/cmd/uploader.py @@ -167,7 +167,8 @@ def _prepare_package(self, pref, prev_bundle): def _compress_package_files(self, layout, pref): download_pkg_folder = layout.download_package() - compression_format = self._app.cache.new_config.get("core.upload:compression_format") or "gzip" + compression_format = self._app.cache.new_config.get("core.upload:compression_format", + default="gzip") if compression_format == "gzip": compress_level_config = "core.gzip:compresslevel" package_file_name = PACKAGE_TGZ_NAME @@ -177,10 +178,10 @@ def _compress_package_files(self, layout, pref): package_file_name = PACKAGE_TZSTD_NAME package_file = os.path.join(download_pkg_folder, PACKAGE_TZSTD_NAME) else: - raise ConanException(f"Unsupported compression level '{compression_format}'") + raise ConanException(f"Unsupported compression format '{compression_format}'") if is_dirty(package_file): - self._output.warning("%s: Removing %s, marked as dirty" % (str(pref), package_file_name)) + self._output.warning(f"{pref}: Removing {package_file_name}, marked as dirty") os.remove(package_file) clean_dirty(package_file) @@ -272,31 +273,28 @@ def compress_files(files, name, dest_dir, compressformat=None, compresslevel=Non if compressformat == "zstd": with open(tar_path, "wb") as tarfile_obj: - def reset_tarinfo(tarinfo): - """ - Resets mtime in the tarinfo for consistency with - gzopen_without_timestamps() - """ - tarinfo.mtime = 0 - return tarinfo - # Only provide level if it was overridden by config. zstd_kwargs = {} if compresslevel is not None: zstd_kwargs["level"] = compresslevel - dctx = zstandard.ZstdCompressor(**zstd_kwargs) + dctx = zstandard.ZstdCompressor(write_checksum=True, threads=-1, **zstd_kwargs) # Create a zstd stream writer so tarfile writes uncompressed data to # the zstd stream writer, which in turn writes compressed data to the # output tar.zst file. with dctx.stream_writer(tarfile_obj) as stream_writer: - with tarfile.open(mode="w|", fileobj=stream_writer, + with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=524288, format=tarfile.PAX_FORMAT) as tar: + unflushed_bytes = 0 for filename, abs_path in sorted(files.items()): - tar.add(abs_path, filename, recursive=False, filter=reset_tarinfo) + tar.add(abs_path, filename, recursive=False) + + unflushed_bytes += os.path.getsize(abs_path) + if unflushed_bytes >= 2097152: + stream_writer.flush() # Flush the current zstd block. + unflushed_bytes = 0 else: - # FIXME, better write to disk sequentially and not keep tgz contents in memory with set_dirty_context_manager(tar_path), open(tar_path, "wb") as tgz_handle: tgz = gzopen_without_timestamps(name, mode="w", fileobj=tgz_handle, compresslevel=compresslevel) diff --git a/conans/client/remote_manager.py b/conans/client/remote_manager.py index 741ae408b0e..24e06ed51ff 100644 --- a/conans/client/remote_manager.py +++ b/conans/client/remote_manager.py @@ -2,7 +2,6 @@ import shutil import tarfile import time -import zstandard from typing import List from requests.exceptions import ConnectionError @@ -267,13 +266,8 @@ def uncompress_file(src_path, dest_folder, scope=None): ConanOutput(scope=scope).info(f"Decompressing {hs} {os.path.basename(src_path)}") with open(src_path, mode='rb') as file_handler: - if src_path.endswith(".tar.zst"): - dctx = zstandard.ZstdDecompressor() - stream_reader = dctx.stream_reader(file_handler) - with tarfile.open(fileobj=stream_reader, mode='r|') as the_tar: - the_tar.extractall(dest_folder) - else: - tar_extract(file_handler, dest_folder) + tar_extract(file_handler, dest_folder, + is_tar_zstd=src_path.endswith(".tar.zst")) except Exception as e: error_msg = "Error while extracting downloaded file '%s' to %s\n%s\n"\ % (src_path, dest_folder, str(e)) diff --git a/conans/util/files.py b/conans/util/files.py index 0a7827315c5..2f6d628cb87 100644 --- a/conans/util/files.py +++ b/conans/util/files.py @@ -7,6 +7,7 @@ import stat import tarfile import time +import zstandard from contextlib import contextmanager @@ -278,13 +279,18 @@ def gzopen_without_timestamps(name, mode="r", fileobj=None, compresslevel=None, return t -def tar_extract(fileobj, destination_dir): - the_tar = tarfile.open(fileobj=fileobj) - # NOTE: The errorlevel=2 has been removed because it was failing in Win10, it didn't allow to - # "could not change modification time", with time=0 - # the_tar.errorlevel = 2 # raise exception if any error - the_tar.extractall(path=destination_dir) - the_tar.close() +def tar_extract(fileobj, destination_dir, is_tar_zst=False): + if is_tar_zst: + dctx = zstandard.ZstdDecompressor() + stream_reader = dctx.stream_reader(fileobj) + with tarfile.open(fileobj=stream_reader, bufsize=512000, mode='r|') as the_tar: + the_tar.extractall(path=destination_dir) + else: + with tarfile.open(fileobj=fileobj) as the_tar: + # NOTE: The errorlevel=2 has been removed because it was failing in Win10, it didn't allow to + # "could not change modification time", with time=0 + # the_tar.errorlevel = 2 # raise exception if any error + the_tar.extractall(path=destination_dir) def exception_message_safe(exc): From db87f56783eff6bd2570bdebc9138ba31a8e1952 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Mon, 22 Jul 2024 11:35:29 -0400 Subject: [PATCH 07/23] Add file missed by merge --- conan/internal/paths.py | 1 + 1 file changed, 1 insertion(+) diff --git a/conan/internal/paths.py b/conan/internal/paths.py index 34c85929f3f..2a00f3c31c3 100644 --- a/conan/internal/paths.py +++ b/conan/internal/paths.py @@ -82,6 +82,7 @@ def _user_home_from_conanrc_file(): CONAN_MANIFEST = "conanmanifest.txt" CONANINFO = "conaninfo.txt" PACKAGE_TGZ_NAME = "conan_package.tgz" +PACKAGE_TZSTD_NAME = "conan_package.tar.zst" EXPORT_TGZ_NAME = "conan_export.tgz" EXPORT_SOURCES_TGZ_NAME = "conan_sources.tgz" DATA_YML = "conandata.yml" From 6a109f4cce3db46d1759e58763f2e3a2cc2afd02 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Mon, 22 Jul 2024 11:36:36 -0400 Subject: [PATCH 08/23] Fix typo in parameter which broke tests --- conans/client/remote_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conans/client/remote_manager.py b/conans/client/remote_manager.py index 4bce6592bcb..32908ea43ee 100644 --- a/conans/client/remote_manager.py +++ b/conans/client/remote_manager.py @@ -287,7 +287,7 @@ def uncompress_file(src_path, dest_folder, scope=None): with open(src_path, mode='rb') as file_handler: tar_extract(file_handler, dest_folder, - is_tar_zstd=src_path.endswith(".tar.zst")) + is_tar_zst=src_path.endswith(".tar.zst")) except Exception as e: error_msg = "Error while extracting downloaded file '%s' to %s\n%s\n"\ % (src_path, dest_folder, str(e)) From e5765e67b118d21c96fcccd2caf6995e5e9c6f8b Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Mon, 22 Jul 2024 12:27:19 -0400 Subject: [PATCH 09/23] A few more small fixes in hopes of unbreaking the build 1. Fix bad merge causing uploader.py change to still refer to `self._app.cache.new_config`, when now we are supposed to use `self._global_conf`. 2. Change two output calls in uploader.py to only output the package file basename to be consistent with other existing log lines. 3. Use double quotes instead of single quotes to be more consistent with existing code. --- conans/client/cmd/uploader.py | 6 +++--- conans/client/remote_manager.py | 1 - conans/model/conf.py | 2 +- conans/requirements.txt | 2 +- conans/util/files.py | 2 +- test/unittests/client/remote_manager_test.py | 6 +++--- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/conans/client/cmd/uploader.py b/conans/client/cmd/uploader.py index 1ba4d90d6f7..b751c2ea2c7 100644 --- a/conans/client/cmd/uploader.py +++ b/conans/client/cmd/uploader.py @@ -189,7 +189,7 @@ def _compress_package_files(self, layout, pref): raise ConanException(f"Unsupported compression format '{compression_format}'") if is_dirty(package_file): - output.warning("Removing %s, marked as dirty" % PACKAGE_TGZ_NAME) + output.warning(f"Removing {package_file_name}, marked as dirty") os.remove(package_file) clean_dirty(package_file) @@ -213,10 +213,10 @@ def _compress_package_files(self, layout, pref): files.pop(CONAN_MANIFEST) if os.path.isfile(package_file): - output.info(f"Not writing '{package_file}' because it already exists.") + output.info(f"Not writing '{package_file_name}' because it already exists.") else: source_files = {f: path for f, path in files.items()} - compresslevel = self._app.cache.new_config.get(compress_level_config, check_type=int) + compresslevel = self._global_conf.get(compress_level_config, check_type=int) compressed_path = compress_files(source_files, package_file_name, download_pkg_folder, compresslevel=compresslevel, compressformat=compression_format, ref=pref) diff --git a/conans/client/remote_manager.py b/conans/client/remote_manager.py index 32908ea43ee..6fe6f19c188 100644 --- a/conans/client/remote_manager.py +++ b/conans/client/remote_manager.py @@ -284,7 +284,6 @@ def uncompress_file(src_path, dest_folder, scope=None): if big_file: hs = human_size(filesize) ConanOutput(scope=scope).info(f"Decompressing {hs} {os.path.basename(src_path)}") - with open(src_path, mode='rb') as file_handler: tar_extract(file_handler, dest_folder, is_tar_zst=src_path.endswith(".tar.zst")) diff --git a/conans/model/conf.py b/conans/model/conf.py index af2863d672a..c7da0e40b5e 100644 --- a/conans/model/conf.py +++ b/conans/model/conf.py @@ -51,7 +51,7 @@ # Compression for `conan upload` "core.upload:compression_format": "The compression format used when uploading Conan packages. " "Possible values: 'zstd', 'gzip' (default=gzip)", - "core.gzip:compresslevel": "The Gzip compresion level for Conan artifacts (default=9)", + "core.gzip:compresslevel": "The Gzip compression level for Conan artifacts (default=9)", "core.zstd:compresslevel": "The zstd compression level for Conan artifacts", # Excluded from revision_mode = "scm" dirty and Git().is_dirty() checks "core.scm:excluded": "List of excluded patterns for builtin git dirty checks", diff --git a/conans/requirements.txt b/conans/requirements.txt index 915b5b7492a..de6594b0273 100644 --- a/conans/requirements.txt +++ b/conans/requirements.txt @@ -7,4 +7,4 @@ fasteners>=0.15 distro>=1.4.0, <=1.8.0; platform_system == 'Linux' or platform_system == 'FreeBSD' Jinja2>=3.0, <4.0.0 python-dateutil>=2.8.0, <3 -zstandard>=0.20, <= 0.21 +zstandard>=0.20, <= 0.23 diff --git a/conans/util/files.py b/conans/util/files.py index c516d498860..905da198b91 100644 --- a/conans/util/files.py +++ b/conans/util/files.py @@ -286,7 +286,7 @@ def tar_extract(fileobj, destination_dir, is_tar_zst=False): if is_tar_zst: dctx = zstandard.ZstdDecompressor() stream_reader = dctx.stream_reader(fileobj) - with tarfile.open(fileobj=stream_reader, bufsize=512000, mode='r|') as the_tar: + with tarfile.open(fileobj=stream_reader, bufsize=512000, mode="r|") as the_tar: the_tar.extractall(path=destination_dir) else: with tarfile.open(fileobj=fileobj) as the_tar: diff --git a/test/unittests/client/remote_manager_test.py b/test/unittests/client/remote_manager_test.py index e90a42b9a17..72d4d392aea 100644 --- a/test/unittests/client/remote_manager_test.py +++ b/test/unittests/client/remote_manager_test.py @@ -35,12 +35,12 @@ def test_compress_and_uncompress_zst_files(self): "Two_file.txt": os.path.join(folder, "Two_file.txt"), } - path = compress_files(files, PACKAGE_TZSTD_NAME, dest_dir=folder, compressformat='zstd') + path = compress_files(files, PACKAGE_TZSTD_NAME, dest_dir=folder, compressformat="zstd") self.assertTrue(os.path.exists(path)) expected_path = os.path.join(folder, PACKAGE_TZSTD_NAME) self.assertEqual(path, expected_path) - extract_dir = os.path.join(folder, 'extracted') + extract_dir = os.path.join(folder, "extracted") uncompress_file(path, extract_dir) extract_files = list(sorted(os.listdir(extract_dir))) @@ -49,5 +49,5 @@ def test_compress_and_uncompress_zst_files(self): for name, path in sorted(files.items()): extract_path = os.path.join(extract_dir, name) - with open(path, 'r') as f1, open(extract_path, 'r') as f2: + with open(path, "r") as f1, open(extract_path, "r") as f2: self.assertEqual(f1.read(), f2.read()) From ff29efc3bfbc8e6f7e2aaf4f4858727fec86c581 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Tue, 23 Jul 2024 13:26:20 -0400 Subject: [PATCH 10/23] Some more improvements 1. Downgrade bufsize to 32KB because that performs well for compression and decompression. The values don't need to be the same, but it happened to be the best value in both compression and decompression tests. 2. Use a context manager for stream_reader as I do for stream_writer. 3. Add some comments about the bufsize value. --- conans/client/cmd/uploader.py | 6 +++++- conans/util/files.py | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/conans/client/cmd/uploader.py b/conans/client/cmd/uploader.py index b751c2ea2c7..b63e65ef051 100644 --- a/conans/client/cmd/uploader.py +++ b/conans/client/cmd/uploader.py @@ -290,7 +290,11 @@ def compress_files(files, name, dest_dir, compressformat=None, compresslevel=Non # the zstd stream writer, which in turn writes compressed data to the # output tar.zst file. with dctx.stream_writer(tarfile_obj) as stream_writer: - with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=524288, + # The choice of bufsize=32768 comes from profiling compression at various + # values and finding that bufsize value consistently performs well. + # The variance in compression times at bufsize<=64KB is small. It is only + # when bufsize>=128KB that compression times start increasing. + with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=32768, format=tarfile.PAX_FORMAT) as tar: unflushed_bytes = 0 for filename, abs_path in sorted(files.items()): diff --git a/conans/util/files.py b/conans/util/files.py index 905da198b91..c86ce490bbb 100644 --- a/conans/util/files.py +++ b/conans/util/files.py @@ -285,9 +285,11 @@ def gzopen_without_timestamps(name, mode="r", fileobj=None, compresslevel=None, def tar_extract(fileobj, destination_dir, is_tar_zst=False): if is_tar_zst: dctx = zstandard.ZstdDecompressor() - stream_reader = dctx.stream_reader(fileobj) - with tarfile.open(fileobj=stream_reader, bufsize=512000, mode="r|") as the_tar: - the_tar.extractall(path=destination_dir) + with dctx.stream_reader(fileobj) as stream_reader: + # The choice of bufsize=32768 comes from profiling decompression at various + # values and finding that bufsize value consistently performs well. + with tarfile.open(fileobj=stream_reader, bufsize=32768, mode="r|") as the_tar: + the_tar.extractall(path=destination_dir) else: with tarfile.open(fileobj=fileobj) as the_tar: # NOTE: The errorlevel=2 has been removed because it was failing in Win10, it didn't allow to From 0c58aa8a793c305cdd671d734709e0d78caf4683 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Mon, 16 Sep 2024 13:40:20 -0400 Subject: [PATCH 11/23] Address some of the review feedback Still need to do some testing though. --- conan/internal/paths.py | 2 +- conans/client/cmd/uploader.py | 38 ++++------------------------- conans/client/remote_manager.py | 2 +- conans/model/conf.py | 2 +- conans/requirements.txt | 1 - conans/util/files.py | 42 ++++++++++++++++++++++++++++++++- 6 files changed, 49 insertions(+), 38 deletions(-) diff --git a/conan/internal/paths.py b/conan/internal/paths.py index 2a00f3c31c3..acce04ab49a 100644 --- a/conan/internal/paths.py +++ b/conan/internal/paths.py @@ -82,7 +82,7 @@ def _user_home_from_conanrc_file(): CONAN_MANIFEST = "conanmanifest.txt" CONANINFO = "conaninfo.txt" PACKAGE_TGZ_NAME = "conan_package.tgz" -PACKAGE_TZSTD_NAME = "conan_package.tar.zst" +PACKAGE_TZSTD_NAME = "conan_package.tzst" EXPORT_TGZ_NAME = "conan_export.tgz" EXPORT_SOURCES_TGZ_NAME = "conan_sources.tgz" DATA_YML = "conandata.yml" diff --git a/conans/client/cmd/uploader.py b/conans/client/cmd/uploader.py index b63e65ef051..2140837cff5 100644 --- a/conans/client/cmd/uploader.py +++ b/conans/client/cmd/uploader.py @@ -2,7 +2,6 @@ import shutil import tarfile import time -import zstandard from conan.internal.conan_app import ConanApp from conan.api.output import ConanOutput @@ -12,7 +11,7 @@ EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME, CONANINFO) from conans.util.files import (clean_dirty, is_dirty, gather_files, gzopen_without_timestamps, set_dirty_context_manager, mkdir, - human_size) + human_size, tar_zst_compress) UPLOAD_POLICY_FORCE = "force-upload" UPLOAD_POLICY_SKIP = "skip-upload" @@ -212,12 +211,10 @@ def _compress_package_files(self, layout, pref): files.pop(CONANINFO) files.pop(CONAN_MANIFEST) - if os.path.isfile(package_file): - output.info(f"Not writing '{package_file_name}' because it already exists.") - else: - source_files = {f: path for f, path in files.items()} + if not os.path.isfile(package_file): + tgz_files = {f: path for f, path in files.items()} compresslevel = self._global_conf.get(compress_level_config, check_type=int) - compressed_path = compress_files(source_files, package_file_name, download_pkg_folder, + compressed_path = compress_files(tgz_files, package_file_name, download_pkg_folder, compresslevel=compresslevel, compressformat=compression_format, ref=pref) @@ -278,32 +275,7 @@ def compress_files(files, name, dest_dir, compressformat=None, compresslevel=Non ConanOutput(scope=str(ref)).info(f"Compressing {name}") if compressformat == "zstd": - with open(tar_path, "wb") as tarfile_obj: - # Only provide level if it was overridden by config. - zstd_kwargs = {} - if compresslevel is not None: - zstd_kwargs["level"] = compresslevel - - dctx = zstandard.ZstdCompressor(write_checksum=True, threads=-1, **zstd_kwargs) - - # Create a zstd stream writer so tarfile writes uncompressed data to - # the zstd stream writer, which in turn writes compressed data to the - # output tar.zst file. - with dctx.stream_writer(tarfile_obj) as stream_writer: - # The choice of bufsize=32768 comes from profiling compression at various - # values and finding that bufsize value consistently performs well. - # The variance in compression times at bufsize<=64KB is small. It is only - # when bufsize>=128KB that compression times start increasing. - with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=32768, - format=tarfile.PAX_FORMAT) as tar: - unflushed_bytes = 0 - for filename, abs_path in sorted(files.items()): - tar.add(abs_path, filename, recursive=False) - - unflushed_bytes += os.path.getsize(abs_path) - if unflushed_bytes >= 2097152: - stream_writer.flush() # Flush the current zstd block. - unflushed_bytes = 0 + tar_zst_compress(tar_path, files, compresslevel=compresslevel) else: with set_dirty_context_manager(tar_path), open(tar_path, "wb") as tgz_handle: tgz = gzopen_without_timestamps(name, mode="w", fileobj=tgz_handle, diff --git a/conans/client/remote_manager.py b/conans/client/remote_manager.py index 6fe6f19c188..633b94418f4 100644 --- a/conans/client/remote_manager.py +++ b/conans/client/remote_manager.py @@ -286,7 +286,7 @@ def uncompress_file(src_path, dest_folder, scope=None): ConanOutput(scope=scope).info(f"Decompressing {hs} {os.path.basename(src_path)}") with open(src_path, mode='rb') as file_handler: tar_extract(file_handler, dest_folder, - is_tar_zst=src_path.endswith(".tar.zst")) + is_tar_zst=src_path.endswith((".tar.zst", ".tzst"))) except Exception as e: error_msg = "Error while extracting downloaded file '%s' to %s\n%s\n"\ % (src_path, dest_folder, str(e)) diff --git a/conans/model/conf.py b/conans/model/conf.py index c7da0e40b5e..cf6eb3c77a9 100644 --- a/conans/model/conf.py +++ b/conans/model/conf.py @@ -52,7 +52,7 @@ "core.upload:compression_format": "The compression format used when uploading Conan packages. " "Possible values: 'zstd', 'gzip' (default=gzip)", "core.gzip:compresslevel": "The Gzip compression level for Conan artifacts (default=9)", - "core.zstd:compresslevel": "The zstd compression level for Conan artifacts", + "core.zstd:compresslevel": "The zstd compression level for Conan artifacts (default=3)", # Excluded from revision_mode = "scm" dirty and Git().is_dirty() checks "core.scm:excluded": "List of excluded patterns for builtin git dirty checks", "core.scm:local_url": "By default allows to store local folders as remote url, but not upload them. Use 'allow' for allowing upload and 'block' to completely forbid it", diff --git a/conans/requirements.txt b/conans/requirements.txt index de6594b0273..165f71c654e 100644 --- a/conans/requirements.txt +++ b/conans/requirements.txt @@ -7,4 +7,3 @@ fasteners>=0.15 distro>=1.4.0, <=1.8.0; platform_system == 'Linux' or platform_system == 'FreeBSD' Jinja2>=3.0, <4.0.0 python-dateutil>=2.8.0, <3 -zstandard>=0.20, <= 0.23 diff --git a/conans/util/files.py b/conans/util/files.py index c86ce490bbb..7c0bf48544f 100644 --- a/conans/util/files.py +++ b/conans/util/files.py @@ -8,7 +8,12 @@ import sys import tarfile import time -import zstandard + +try: + import zstandard + zstandard_exception = None +except ImportError as e: + zstandard_exception = e from contextlib import contextmanager @@ -282,8 +287,43 @@ def gzopen_without_timestamps(name, mode="r", fileobj=None, compresslevel=None, return t +def raise_if_zstandard_not_present(operation): + if zstandard_exception: + raise ConanException( + f"zstandard {operation} was requested, but the required package is not present. " + f"Please install it using 'pip install zstandard' and try again. " + f"Exception details: {zstandard_exception}") + + +def tar_zst_compress(tar_path, files, compresslevel=None): + raise_if_zstandard_not_present("compression") + + with open(tar_path, "wb") as tarfile_obj: + # Only provide level if it was overridden by config. + zstd_kwargs = {} + if compresslevel is not None: + zstd_kwargs["level"] = compresslevel + + dctx = zstandard.ZstdCompressor(write_checksum=True, threads=-1, **zstd_kwargs) + + # Create a zstd stream writer so tarfile writes uncompressed data to + # the zstd stream writer, which in turn writes compressed data to the + # output tar.zst file. + with dctx.stream_writer(tarfile_obj) as stream_writer: + # The choice of bufsize=32768 comes from profiling compression at various + # values and finding that bufsize value consistently performs well. + # The variance in compression times at bufsize<=64KB is small. It is only + # when bufsize>=128KB that compression times start increasing. + with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=32768, + format=tarfile.PAX_FORMAT) as tar: + for filename, abs_path in sorted(files.items()): + tar.add(abs_path, filename, recursive=False) + + def tar_extract(fileobj, destination_dir, is_tar_zst=False): if is_tar_zst: + raise_if_zstandard_not_present("decompression") + dctx = zstandard.ZstdDecompressor() with dctx.stream_reader(fileobj) as stream_reader: # The choice of bufsize=32768 comes from profiling decompression at various From 79afdaecc0a28caba38d74a88298d361cc6a5f94 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Tue, 17 Sep 2024 15:26:54 -0400 Subject: [PATCH 12/23] Flush zstd frames around every 128MB --- conans/util/files.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conans/util/files.py b/conans/util/files.py index 7c0bf48544f..84bfb713790 100644 --- a/conans/util/files.py +++ b/conans/util/files.py @@ -316,9 +316,18 @@ def tar_zst_compress(tar_path, files, compresslevel=None): # when bufsize>=128KB that compression times start increasing. with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=32768, format=tarfile.PAX_FORMAT) as tar: + current_frame_bytes = 0 for filename, abs_path in sorted(files.items()): tar.add(abs_path, filename, recursive=False) + # Flush the current frame if it has reached a large enough size. + # There is no required size, but 128MB is a good starting point + # because it allows for faster random access to the file. + current_frame_bytes += os.path.getsize(abs_path) + if current_frame_bytes >= 134217728: + stream_writer.flush(zstandard.FLUSH_FRAME) + current_frame_bytes = 0 + def tar_extract(fileobj, destination_dir, is_tar_zst=False): if is_tar_zst: From b8f4a5799ebdf3016c75e5e1e64e0664f3001264 Mon Sep 17 00:00:00 2001 From: Adam Gross Date: Mon, 23 Sep 2024 12:26:18 -0400 Subject: [PATCH 13/23] Fix DeprecationWarning Newer Python has this warning: DeprecationWarning: Python 3.14 will, by default, filter extracted tar archives and reject files or modify their metadata. Use the filter argument to control this behavior --- conans/util/files.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/conans/util/files.py b/conans/util/files.py index 2ff1ce8f504..e49f24950b9 100644 --- a/conans/util/files.py +++ b/conans/util/files.py @@ -348,13 +348,15 @@ def tar_extract(fileobj, destination_dir, is_tar_zst=False): # The choice of bufsize=32768 comes from profiling decompression at various # values and finding that bufsize value consistently performs well. with tarfile.open(fileobj=stream_reader, bufsize=32768, mode="r|") as the_tar: - the_tar.extractall(path=destination_dir) + the_tar.extractall(path=destination_dir, + filter=lambda tarinfo, _: tarinfo) else: with tarfile.open(fileobj=fileobj) as the_tar: # NOTE: The errorlevel=2 has been removed because it was failing in Win10, it didn't allow to # "could not change modification time", with time=0 # the_tar.errorlevel = 2 # raise exception if any error - the_tar.extractall(path=destination_dir) + the_tar.extractall(path=destination_dir, + filter=lambda tarinfo, _: tarinfo) def exception_message_safe(exc): From ff9cfa32822d778e85d50316f4c16bc60d06793a Mon Sep 17 00:00:00 2001 From: memsharded Date: Mon, 1 Dec 2025 20:03:19 +0100 Subject: [PATCH 14/23] wip --- conan/internal/api/uploader.py | 118 ++++++++++--------- conan/internal/model/conf.py | 4 +- conan/internal/model/manifest.py | 9 +- conan/internal/paths.py | 1 - conan/internal/rest/remote_manager.py | 21 ++-- conan/internal/rest/rest_client_v2.py | 13 +- conan/internal/util/files.py | 48 -------- conan/internal/util/zstd.py | 52 ++++++++ test/integration/test_compressions.py | 17 +++ test/unittests/client/remote_manager_test.py | 37 +++++- 10 files changed, 187 insertions(+), 133 deletions(-) create mode 100644 conan/internal/util/zstd.py create mode 100644 test/integration/test_compressions.py diff --git a/conan/internal/api/uploader.py b/conan/internal/api/uploader.py index 9011998f1c9..1b9322b8901 100644 --- a/conan/internal/api/uploader.py +++ b/conan/internal/api/uploader.py @@ -10,11 +10,10 @@ from conan.internal.source import retrieve_exports_sources from conan.internal.errors import NotFoundException from conan.errors import ConanException -from conan.internal.paths import (CONAN_MANIFEST, CONANFILE, EXPORT_SOURCES_TGZ_NAME, - EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, CONANINFO, PACKAGE_TZSTD_NAME) +from conan.internal.paths import CONAN_MANIFEST, CONANFILE, CONANINFO from conan.internal.util.files import (clean_dirty, is_dirty, gather_files, - set_dirty_context_manager, mkdir, human_size, - tar_zst_compress) + set_dirty_context_manager, mkdir, human_size) +from conan.internal.util.zstd import tar_zst_compress UPLOAD_POLICY_FORCE = "force-upload" UPLOAD_POLICY_SKIP = "skip-upload" @@ -129,14 +128,6 @@ def _prepare_recipe(self, ref, ref_bundle, conanfile, remotes): def _compress_recipe_files(self, layout, ref): download_export_folder = layout.download_export() - output = ConanOutput(scope=str(ref)) - for f in (EXPORT_TGZ_NAME, EXPORT_SOURCES_TGZ_NAME): - tgz_path = os.path.join(download_export_folder, f) - if is_dirty(tgz_path): - output.warning("Removing %s, marked as dirty" % f) - os.remove(tgz_path) - clean_dirty(tgz_path) - export_folder = layout.export() files, symlinked_folders = gather_files(export_folder) files.update(symlinked_folders) @@ -160,18 +151,12 @@ def _compress_recipe_files(self, layout, ref): files.pop(CONANFILE) files.pop(CONAN_MANIFEST) - def add_tgz(tgz_name, tgz_files): - tgz = os.path.join(download_export_folder, tgz_name) - if os.path.isfile(tgz): - result[tgz_name] = tgz - elif tgz_files: - compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int) - tgz = compress_files(tgz_files, tgz_name, download_export_folder, - compresslevel=compresslevel, ref=ref) - result[tgz_name] = tgz - - add_tgz(EXPORT_TGZ_NAME, files) - add_tgz(EXPORT_SOURCES_TGZ_NAME, src_files) + if files: + comp = self._compressed_file("conan_export", files, download_export_folder, ref) + result[comp] = os.path.join(download_export_folder, comp) + if src_files: + comp = self._compressed_file("conan_sources", src_files, download_export_folder, ref) + result[comp] = os.path.join(download_export_folder, comp) return result def _prepare_package(self, pref, prev_bundle): @@ -182,28 +167,50 @@ def _prepare_package(self, pref, prev_bundle): cache_files = self._compress_package_files(pkg_layout, pref) prev_bundle["files"] = cache_files + def _compressed_file(self, filename, files, download_folder, ref): + output = ConanOutput(scope=str(ref)) + formats = {"zstd": ".tzst", + "xz": ".txz", + "gzip": ".tgz"} + + # Check if there is some existing compressed file alreday + matches = [] + for extension in formats.values(): + file_name = filename + extension + package_file = os.path.join(download_folder, file_name) + if is_dirty(package_file): + output.warning(f"Removing {file_name}, marked as dirty") + os.remove(package_file) + clean_dirty(package_file) + if os.path.isfile(package_file): + matches.append(file_name) + if len(matches) > 1: + raise ConanException(f"{ref}: Multiple package files found for {filename}: {matches}") + if len(matches) == 1: + return matches[0] + + # No compressed file exists, need to compress + compressformat = self._global_conf.get("core.upload:compression_format", + default="gzip", choices=("zstd", "xz", "gzip")) + compresslevel = self._global_conf.get("core:compresslevel", check_type=int) + if compresslevel is None and compressformat == "gzip": + compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int) + if compresslevel is not None: + ConanOutput().warning("core.gzip:compresslevel is deprecated, " + "use core.compresslevel instead", warn_tag="deprecated") + + file_name = filename + formats[compressformat] + package_file = os.path.join(download_folder, file_name) + compressed_path = compress_files(files, file_name, download_folder, + compresslevel=compresslevel, compressformat=compressformat, + ref=ref) + assert compressed_path == package_file + assert os.path.exists(package_file) + return file_name + def _compress_package_files(self, layout, pref): - output = ConanOutput(scope=str(pref)) download_pkg_folder = layout.download_package() - compression_format = self._global_conf.get("core.upload:compression_format", - default="gzip") - if compression_format == "gzip": - compress_level_config = "core.gzip:compresslevel" - package_file_name = PACKAGE_TGZ_NAME - package_file = os.path.join(download_pkg_folder, PACKAGE_TGZ_NAME) - elif compression_format == "zstd": - compress_level_config = "core.zstd:compresslevel" - package_file_name = PACKAGE_TZSTD_NAME - package_file = os.path.join(download_pkg_folder, PACKAGE_TZSTD_NAME) - else: - raise ConanException(f"Unsupported compression format '{compression_format}'") - - if is_dirty(package_file): - output.warning(f"Removing {package_file_name}, marked as dirty") - os.remove(package_file) - clean_dirty(package_file) - # Get all the files in that directory # existing package package_folder = layout.package() @@ -223,17 +230,8 @@ def _compress_package_files(self, layout, pref): files.pop(CONANINFO) files.pop(CONAN_MANIFEST) - if not os.path.isfile(package_file): - tgz_files = {f: path for f, path in files.items()} - compresslevel = self._global_conf.get(compress_level_config, check_type=int) - compressed_path = compress_files(tgz_files, package_file_name, download_pkg_folder, - compresslevel=compresslevel, compressformat=compression_format, - ref=pref) - - assert compressed_path == package_file - assert os.path.exists(package_file) - - return {package_file_name: package_file, + compressed_file = self._compressed_file("conan_package", files, download_pkg_folder, pref) + return {compressed_file: os.path.join(download_pkg_folder, compressed_file), CONANINFO: os.path.join(download_pkg_folder, CONANINFO), CONAN_MANIFEST: os.path.join(download_pkg_folder, CONAN_MANIFEST)} @@ -303,12 +301,20 @@ def compress_files(files, name, dest_dir, compressformat=None, compresslevel=Non t1 = time.time() # FIXME, better write to disk sequentially and not keep tgz contents in memory tgz_path = os.path.join(dest_dir, name) + if ref: + ConanOutput(scope=str(ref) if ref else None).info(f"Compressing {name}") + if compressformat == "zstd": tar_zst_compress(tgz_path, files, compresslevel=compresslevel) return - if ref: - ConanOutput(scope=str(ref) if ref else None).info(f"Compressing {name}") + if compressformat == "xz": + with tarfile.open(tgz_path, "w:xz") as tar: + for filename, abs_path in sorted(files.items()): + # recursive is False by default in case it is a symlink to a folder + tar.add(abs_path, filename, recursive=recursive) + return tgz_path + with set_dirty_context_manager(tgz_path), open(tgz_path, "wb") as tgz_handle: tgz = gzopen_without_timestamps(name, fileobj=tgz_handle, compresslevel=compresslevel) for filename, abs_path in sorted(files.items()): diff --git a/conan/internal/model/conf.py b/conan/internal/model/conf.py index ba83fba4671..1e51a486551 100644 --- a/conan/internal/model/conf.py +++ b/conan/internal/model/conf.py @@ -62,9 +62,9 @@ "core.net.http:clean_system_proxy": "If defined, the proxies system env-vars will be discarded", # Compression for `conan upload` "core.upload:compression_format": "The compression format used when uploading Conan packages. " - "Possible values: 'zstd', 'gzip' (default=gzip)", + "Possible values: 'zstd', 'xz', 'gzip' (default=gzip)", "core.gzip:compresslevel": "The Gzip compression level for Conan artifacts (default=9)", - "core.zstd:compresslevel": "The zstd compression level for Conan artifacts (default=3)", + "core:compresslevel": "The compression level for Conan artifacts (default zstd=3, gz=9)", # Excluded from revision_mode = "scm" dirty and Git().is_dirty() checks "core.scm:excluded": "List of excluded patterns for builtin git dirty checks", "core.scm:local_url": "By default allows to store local folders as remote url, but not upload them. Use 'allow' for allowing upload and 'block' to completely forbid it", diff --git a/conan/internal/model/manifest.py b/conan/internal/model/manifest.py index 4b01a894ef3..bd49135d3cf 100644 --- a/conan/internal/model/manifest.py +++ b/conan/internal/model/manifest.py @@ -1,8 +1,7 @@ import os from collections import defaultdict -from conan.internal.paths import CONAN_MANIFEST, EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME, \ - PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME +from conan.internal.paths import CONAN_MANIFEST from conan.internal.util.dates import timestamp_now, timestamp_to_str from conan.internal.util.files import load, md5, md5sum, save, gather_files @@ -92,8 +91,10 @@ def create(cls, folder, exports_sources_folder=None): """ files, _ = gather_files(folder) # The folders symlinks are discarded for the manifest - for f in (PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME, EXPORT_TGZ_NAME, CONAN_MANIFEST, EXPORT_SOURCES_TGZ_NAME): - files.pop(f, None) + for f in ("conan_package", "conan_export", "conan_sources"): + for e in (".gz", ".xz", ".tzst"): + files.pop(f + e, None) + files.pop(CONAN_MANIFEST, None) file_dict = {} for name, filepath in files.items(): diff --git a/conan/internal/paths.py b/conan/internal/paths.py index 865047a2f2d..084c35dfd84 100644 --- a/conan/internal/paths.py +++ b/conan/internal/paths.py @@ -87,7 +87,6 @@ def _user_home_from_conanrc_file(): CONAN_MANIFEST = "conanmanifest.txt" CONANINFO = "conaninfo.txt" PACKAGE_TGZ_NAME = "conan_package.tgz" -PACKAGE_TZSTD_NAME = "conan_package.tzst" EXPORT_TGZ_NAME = "conan_export.tgz" EXPORT_SOURCES_TGZ_NAME = "conan_sources.tgz" DATA_YML = "conandata.yml" diff --git a/conan/internal/rest/remote_manager.py b/conan/internal/rest/remote_manager.py index 9187714f84e..8c50148e523 100644 --- a/conan/internal/rest/remote_manager.py +++ b/conan/internal/rest/remote_manager.py @@ -1,6 +1,5 @@ import os import shutil -import time from collections import namedtuple from typing import List @@ -19,8 +18,7 @@ from conan.api.model import PkgReference from conan.api.model import RecipeReference from conan.internal.util.files import rmdir, human_size -from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, \ - PACKAGE_TZSTD_NAME +from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME from conan.internal.util.files import mkdir, tar_extract @@ -184,18 +182,16 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata): for f in ("conaninfo.txt", "conanmanifest.txt"): if f not in zipped_files: raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: no {f}") - accepted_package_files = [PACKAGE_TZSTD_NAME, PACKAGE_TGZ_NAME] - package_file = next((f for f in zipped_files if f in accepted_package_files), None) + + package_file = next((f for f in zipped_files if "conan_package" in f), None) if not package_file: - raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: no {accepted_package_files} found") + raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: " + f"no conan_package found") self._signer.verify(pref, download_pkg_folder, zipped_files) - package_file = zipped_files.pop(package_file, None) + tgz_file = zipped_files.pop(package_file, None) package_folder = layout.package() - t1 = time.time() - uncompress_file(package_file, package_folder, scope=str(pref.ref)) - duration = time.time() - t1 - scoped_output.debug(f"Decompressed {package_file} in {duration} seconds") + uncompress_file(tgz_file, package_folder, scope=str(pref.ref)) mkdir(package_folder) # Just in case it doesn't exist, because uncompress did nothing for file_name, file_path in zipped_files.items(): # copy CONANINFO and CONANMANIFEST shutil.move(file_path, os.path.join(package_folder, file_name)) @@ -354,8 +350,7 @@ def uncompress_file(src_path, dest_folder, scope=None): hs = human_size(filesize) ConanOutput(scope=scope).info(f"Decompressing {hs} {os.path.basename(src_path)}") with open(src_path, mode='rb') as file_handler: - tar_extract(file_handler, dest_folder, - is_tar_zst=src_path.endswith((".tar.zst", ".tzst"))) + tar_extract(file_handler, dest_folder) except Exception as e: error_msg = "Error while extracting downloaded file '%s' to %s\n%s\n"\ % (src_path, dest_folder, str(e)) diff --git a/conan/internal/rest/rest_client_v2.py b/conan/internal/rest/rest_client_v2.py index 0aefbf0faca..db3681ee9fa 100644 --- a/conan/internal/rest/rest_client_v2.py +++ b/conan/internal/rest/rest_client_v2.py @@ -17,7 +17,7 @@ RecipeNotFoundException, PackageNotFoundException, EXCEPTION_CODE_MAPPING from conan.errors import ConanException from conan.api.model import PkgReference -from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME, PACKAGE_TZSTD_NAME, PACKAGE_TGZ_NAME +from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME from conan.api.model import RecipeReference from conan.internal.util.dates import from_iso8601_to_timestamp @@ -274,14 +274,15 @@ def get_package(self, pref, dest_folder, metadata, only_metadata): data = self._get_file_list_json(url) server_files = data["files"] result = {} + pkg_files = [f for f in server_files if f.startswith("conan_package.")] + if len(pkg_files) > 1: + raise ConanException(f"Package {pref} is corrupted in the server, it contains " + f"more than one package file: {pkg_files}") # Download only known files, but not metadata (except sign) if not only_metadata: # Retrieve package first, then metadata - accepted_package_files = [PACKAGE_TZSTD_NAME, PACKAGE_TGZ_NAME] accepted_files = ["conaninfo.txt", "conanmanifest.txt", "metadata/sign"] - for f in accepted_package_files: - if f in server_files: - accepted_files = [f] + accepted_files - break + if len(pkg_files) == 1: + accepted_files.append(pkg_files[0]) files = [f for f in server_files if any(f.startswith(m) for m in accepted_files)] # If we didn't indicated reference, server got the latest, use absolute now, it's safer urls = {fn: self.router.package_file(pref, fn) for fn in files} diff --git a/conan/internal/util/files.py b/conan/internal/util/files.py index 53539a26199..f7b0d3e186f 100644 --- a/conan/internal/util/files.py +++ b/conan/internal/util/files.py @@ -8,12 +8,6 @@ import tarfile import time -try: - import zstandard - zstandard_exception = None -except ImportError as e: - zstandard_exception = e - from contextlib import contextmanager from conan.errors import ConanException @@ -261,48 +255,6 @@ def mkdir(path): os.makedirs(path) -def raise_if_zstandard_not_present(operation): - if zstandard_exception: - raise ConanException( - f"zstandard {operation} was requested, but the required package is not present. " - f"Please install it using 'pip install zstandard' and try again. " - f"Exception details: {zstandard_exception}") - - -def tar_zst_compress(tar_path, files, compresslevel=None): - raise_if_zstandard_not_present("compression") - - with open(tar_path, "wb") as tarfile_obj: - # Only provide level if it was overridden by config. - zstd_kwargs = {} - if compresslevel is not None: - zstd_kwargs["level"] = compresslevel - - dctx = zstandard.ZstdCompressor(write_checksum=True, threads=-1, **zstd_kwargs) - - # Create a zstd stream writer so tarfile writes uncompressed data to - # the zstd stream writer, which in turn writes compressed data to the - # output tar.zst file. - with dctx.stream_writer(tarfile_obj) as stream_writer: - # The choice of bufsize=32768 comes from profiling compression at various - # values and finding that bufsize value consistently performs well. - # The variance in compression times at bufsize<=64KB is small. It is only - # when bufsize>=128KB that compression times start increasing. - with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=32768, - format=tarfile.PAX_FORMAT) as tar: - current_frame_bytes = 0 - for filename, abs_path in sorted(files.items()): - tar.add(abs_path, filename, recursive=False) - - # Flush the current frame if it has reached a large enough size. - # There is no required size, but 128MB is a good starting point - # because it allows for faster random access to the file. - current_frame_bytes += os.path.getsize(abs_path) - if current_frame_bytes >= 134217728: - stream_writer.flush(zstandard.FLUSH_FRAME) - current_frame_bytes = 0 - - def tar_extract(fileobj, destination_dir, is_tar_zst=False): if is_tar_zst: raise_if_zstandard_not_present("decompression") diff --git a/conan/internal/util/zstd.py b/conan/internal/util/zstd.py new file mode 100644 index 00000000000..3e6f2243eaa --- /dev/null +++ b/conan/internal/util/zstd.py @@ -0,0 +1,52 @@ +import os +import tarfile + +from conan.errors import ConanException + +try: + import zstandard + zstandard_exception = None +except ImportError as e: + zstandard_exception = e + + +def raise_if_zstandard_not_present(operation): + if zstandard_exception: + raise ConanException( + f"zstandard {operation} was requested, but the required package is not present. " + f"Please install it using 'pip install zstandard' and try again. " + f"Exception details: {zstandard_exception}") + + +def tar_zst_compress(tar_path, files, compresslevel=None): + raise_if_zstandard_not_present("compression") + + with open(tar_path, "wb") as tarfile_obj: + # Only provide level if it was overridden by config. + zstd_kwargs = {} + if compresslevel is not None: + zstd_kwargs["level"] = compresslevel + + dctx = zstandard.ZstdCompressor(write_checksum=True, threads=-1, **zstd_kwargs) + + # Create a zstd stream writer so tarfile writes uncompressed data to + # the zstd stream writer, which in turn writes compressed data to the + # output tar.zst file. + with dctx.stream_writer(tarfile_obj) as stream_writer: + # The choice of bufsize=32768 comes from profiling compression at various + # values and finding that bufsize value consistently performs well. + # The variance in compression times at bufsize<=64KB is small. It is only + # when bufsize>=128KB that compression times start increasing. + with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=32768, + format=tarfile.PAX_FORMAT) as tar: + current_frame_bytes = 0 + for filename, abs_path in sorted(files.items()): + tar.add(abs_path, filename, recursive=False) + + # Flush the current frame if it has reached a large enough size. + # There is no required size, but 128MB is a good starting point + # because it allows for faster random access to the file. + current_frame_bytes += os.path.getsize(abs_path) + if current_frame_bytes >= 134217728: + stream_writer.flush(zstandard.FLUSH_FRAME) + current_frame_bytes = 0 diff --git a/test/integration/test_compressions.py b/test/integration/test_compressions.py new file mode 100644 index 00000000000..61cb302b383 --- /dev/null +++ b/test/integration/test_compressions.py @@ -0,0 +1,17 @@ +from conan.test.utils.tools import TestClient + + +def test_xz(): + c = TestClient(default_server_user=True) + c.save_home({"global.conf": "core.upload:compression_format=xz"}) + c.run("new header_lib") + c.run("create -tf=") + c.run("upload * -r=default -c") + print(c.out) + c.run("remove * -c") + c.run("install --requires=mypkg/0.1") + print(c.out) + c.run("cache path mypkg/0.1") + print(c.out) + c.run("cache path mypkg/0.1:da39a3ee5e6b4b0d3255bfef95601890afd80709") + print(c.out) diff --git a/test/unittests/client/remote_manager_test.py b/test/unittests/client/remote_manager_test.py index fd1a1327e10..88cc3f9df1b 100644 --- a/test/unittests/client/remote_manager_test.py +++ b/test/unittests/client/remote_manager_test.py @@ -1,7 +1,10 @@ import os +import sys + +import pytest from conan.internal.api.uploader import compress_files -from conan.internal.paths import PACKAGE_TGZ_NAME, PACKAGE_TZSTD_NAME +from conan.internal.paths import PACKAGE_TGZ_NAME from conan.internal.rest.remote_manager import uncompress_file from conan.test.utils.test_files import temp_folder from conan.internal.util.files import save @@ -24,6 +27,34 @@ def test_compress_files_tgz(self): expected_path = os.path.join(folder, PACKAGE_TGZ_NAME) assert path == expected_path + def test_compress_and_uncompress_xz_files(self): + folder = temp_folder() + save(os.path.join(folder, "one_file.txt"), "The contents") + save(os.path.join(folder, "Two_file.txt"), "Two contents") + + files = { + "one_file.txt": os.path.join(folder, "one_file.txt"), + "Two_file.txt": os.path.join(folder, "Two_file.txt"), + } + + path = compress_files(files, "conan_package.txz", dest_dir=folder, compressformat="xz") + assert os.path.exists(path) + expected_path = os.path.join(folder, "conan_package.txz") + assert path == expected_path + + extract_dir = os.path.join(folder, "extracted") + uncompress_file(path, extract_dir) + + extract_files = list(sorted(os.listdir(extract_dir))) + expected_files = sorted(files.keys()) + assert extract_files == expected_files + + for name, path in files.items(): + extract_path = os.path.join(extract_dir, name) + with open(path, "r") as f1, open(extract_path, "r") as f2: + assert f1.read() == f2.read() + + @pytest.mark.skipif(sys.version_info.minor < 14, reason="zstd needs Python >= 3.14") def test_compress_and_uncompress_zst_files(self): folder = temp_folder() save(os.path.join(folder, "one_file.txt"), "The contents") @@ -34,9 +65,9 @@ def test_compress_and_uncompress_zst_files(self): "Two_file.txt": os.path.join(folder, "Two_file.txt"), } - path = compress_files(files, PACKAGE_TZSTD_NAME, dest_dir=folder, compressformat="zstd") + path = compress_files(files, "conan_package.tzst", dest_dir=folder, compressformat="zstd") assert os.path.exists(path) - expected_path = os.path.join(folder, PACKAGE_TZSTD_NAME) + expected_path = os.path.join(folder, "conan_package.tzst") assert path == expected_path extract_dir = os.path.join(folder, "extracted") From d438303d0b036185a718a96ec04048ba91b5ec08 Mon Sep 17 00:00:00 2001 From: memsharded Date: Tue, 2 Dec 2025 14:12:53 +0100 Subject: [PATCH 15/23] wip --- conan/internal/api/uploader.py | 33 ++++---- conan/internal/model/manifest.py | 7 +- conan/internal/paths.py | 7 +- conan/internal/rest/remote_manager.py | 12 +-- conan/internal/rest/rest_client_v2.py | 13 +-- conan/internal/util/files.py | 14 +--- conan/internal/util/zstd.py | 52 ------------ conan/test/utils/test_files.py | 2 +- .../integration/command/upload/upload_test.py | 7 +- test/integration/symlinks/symlinks_test.py | 3 +- test/integration/test_compressions.py | 79 ++++++++++++++++--- test/integration/tgz_macos_dot_files_test.py | 3 +- test/unittests/client/remote_manager_test.py | 5 +- 13 files changed, 115 insertions(+), 122 deletions(-) delete mode 100644 conan/internal/util/zstd.py diff --git a/conan/internal/api/uploader.py b/conan/internal/api/uploader.py index 1b9322b8901..7cf63b5f0d0 100644 --- a/conan/internal/api/uploader.py +++ b/conan/internal/api/uploader.py @@ -10,10 +10,10 @@ from conan.internal.source import retrieve_exports_sources from conan.internal.errors import NotFoundException from conan.errors import ConanException -from conan.internal.paths import CONAN_MANIFEST, CONANFILE, CONANINFO +from conan.internal.paths import CONAN_MANIFEST, CONANFILE, CONANINFO, COMPRESSIONS, \ + EXPORT_SOURCES_FILE_NAME, EXPORT_FILE_NAME, PACKAGE_FILE_NAME from conan.internal.util.files import (clean_dirty, is_dirty, gather_files, set_dirty_context_manager, mkdir, human_size) -from conan.internal.util.zstd import tar_zst_compress UPLOAD_POLICY_FORCE = "force-upload" UPLOAD_POLICY_SKIP = "skip-upload" @@ -152,10 +152,11 @@ def _compress_recipe_files(self, layout, ref): files.pop(CONAN_MANIFEST) if files: - comp = self._compressed_file("conan_export", files, download_export_folder, ref) + comp = self._compressed_file(EXPORT_FILE_NAME, files, download_export_folder, ref) result[comp] = os.path.join(download_export_folder, comp) if src_files: - comp = self._compressed_file("conan_sources", src_files, download_export_folder, ref) + comp = self._compressed_file(EXPORT_SOURCES_FILE_NAME, src_files, + download_export_folder, ref) result[comp] = os.path.join(download_export_folder, comp) return result @@ -169,13 +170,10 @@ def _prepare_package(self, pref, prev_bundle): def _compressed_file(self, filename, files, download_folder, ref): output = ConanOutput(scope=str(ref)) - formats = {"zstd": ".tzst", - "xz": ".txz", - "gzip": ".tgz"} # Check if there is some existing compressed file alreday matches = [] - for extension in formats.values(): + for extension in COMPRESSIONS: file_name = filename + extension package_file = os.path.join(download_folder, file_name) if is_dirty(package_file): @@ -191,15 +189,15 @@ def _compressed_file(self, filename, files, download_folder, ref): # No compressed file exists, need to compress compressformat = self._global_conf.get("core.upload:compression_format", - default="gzip", choices=("zstd", "xz", "gzip")) + default="gz", choices=COMPRESSIONS) compresslevel = self._global_conf.get("core:compresslevel", check_type=int) - if compresslevel is None and compressformat == "gzip": + if compresslevel is None and compressformat == "gz": compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int) if compresslevel is not None: ConanOutput().warning("core.gzip:compresslevel is deprecated, " "use core.compresslevel instead", warn_tag="deprecated") - file_name = filename + formats[compressformat] + file_name = filename + compressformat package_file = os.path.join(download_folder, file_name) compressed_path = compress_files(files, file_name, download_folder, compresslevel=compresslevel, compressformat=compressformat, @@ -230,7 +228,7 @@ def _compress_package_files(self, layout, pref): files.pop(CONANINFO) files.pop(CONAN_MANIFEST) - compressed_file = self._compressed_file("conan_package", files, download_pkg_folder, pref) + compressed_file = self._compressed_file(PACKAGE_FILE_NAME, files, download_pkg_folder, pref) return {compressed_file: os.path.join(download_pkg_folder, compressed_file), CONANINFO: os.path.join(download_pkg_folder, CONANINFO), CONAN_MANIFEST: os.path.join(download_pkg_folder, CONAN_MANIFEST)} @@ -304,14 +302,15 @@ def compress_files(files, name, dest_dir, compressformat=None, compresslevel=Non if ref: ConanOutput(scope=str(ref) if ref else None).info(f"Compressing {name}") - if compressformat == "zstd": - tar_zst_compress(tgz_path, files, compresslevel=compresslevel) - return + if compressformat == "zst": + with tarfile.open(tgz_path, "w:zst", level=compresslevel) as tar: # noqa Py314 only + for filename, abs_path in sorted(files.items()): + tar.add(abs_path, filename, recursive=recursive) + return tgz_path if compressformat == "xz": - with tarfile.open(tgz_path, "w:xz") as tar: + with tarfile.open(tgz_path, "w:xz", preset=compresslevel, format=tarfile.PAX_FORMAT) as tar: for filename, abs_path in sorted(files.items()): - # recursive is False by default in case it is a symlink to a folder tar.add(abs_path, filename, recursive=recursive) return tgz_path diff --git a/conan/internal/model/manifest.py b/conan/internal/model/manifest.py index bd49135d3cf..3e598c8e3b2 100644 --- a/conan/internal/model/manifest.py +++ b/conan/internal/model/manifest.py @@ -1,7 +1,8 @@ import os from collections import defaultdict -from conan.internal.paths import CONAN_MANIFEST +from conan.internal.paths import CONAN_MANIFEST, COMPRESSIONS, PACKAGE_FILE_NAME, EXPORT_FILE_NAME, \ + EXPORT_SOURCES_FILE_NAME from conan.internal.util.dates import timestamp_now, timestamp_to_str from conan.internal.util.files import load, md5, md5sum, save, gather_files @@ -91,8 +92,8 @@ def create(cls, folder, exports_sources_folder=None): """ files, _ = gather_files(folder) # The folders symlinks are discarded for the manifest - for f in ("conan_package", "conan_export", "conan_sources"): - for e in (".gz", ".xz", ".tzst"): + for f in (PACKAGE_FILE_NAME, EXPORT_FILE_NAME, EXPORT_SOURCES_FILE_NAME): + for e in COMPRESSIONS: files.pop(f + e, None) files.pop(CONAN_MANIFEST, None) diff --git a/conan/internal/paths.py b/conan/internal/paths.py index 084c35dfd84..adc7d0dfa6f 100644 --- a/conan/internal/paths.py +++ b/conan/internal/paths.py @@ -86,7 +86,8 @@ def _user_home_from_conanrc_file(): CONANFILE_TXT = "conanfile.txt" CONAN_MANIFEST = "conanmanifest.txt" CONANINFO = "conaninfo.txt" -PACKAGE_TGZ_NAME = "conan_package.tgz" -EXPORT_TGZ_NAME = "conan_export.tgz" -EXPORT_SOURCES_TGZ_NAME = "conan_sources.tgz" +PACKAGE_FILE_NAME = "conan_package.t" +EXPORT_FILE_NAME = "conan_export.t" +EXPORT_SOURCES_FILE_NAME = "conan_sources.t" +COMPRESSIONS = "gz", "xz", "zst" DATA_YML = "conandata.yml" diff --git a/conan/internal/rest/remote_manager.py b/conan/internal/rest/remote_manager.py index 8c50148e523..0f66a1685c0 100644 --- a/conan/internal/rest/remote_manager.py +++ b/conan/internal/rest/remote_manager.py @@ -7,6 +7,7 @@ from requests.exceptions import ConnectionError from conan.api.model import LOCAL_RECIPES_INDEX +from conan.internal.paths import CONANINFO, CONAN_MANIFEST, PACKAGE_FILE_NAME, EXPORT_FILE_NAME from conan.internal.rest.rest_client_local_recipe_index import RestApiClientLocalRecipesIndex from conan.api.model import Remote from conan.api.output import ConanOutput @@ -18,7 +19,6 @@ from conan.api.model import PkgReference from conan.api.model import RecipeReference from conan.internal.util.files import rmdir, human_size -from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME from conan.internal.util.files import mkdir, tar_extract @@ -87,7 +87,8 @@ def get_recipe(self, ref, remote, metadata=None): self._cache.remove_recipe_layout(layout) raise export_folder = layout.export() - tgz_file = zipped_files.pop(EXPORT_TGZ_NAME, None) + export_file = next((f for f in zipped_files if EXPORT_FILE_NAME in f), None) + tgz_file = zipped_files.pop(export_file, None) if tgz_file: uncompress_file(tgz_file, export_folder, scope=str(ref)) @@ -133,7 +134,8 @@ def get_recipe_sources(self, ref, layout, remote): return self._signer.verify(ref, download_folder, files=zipped_files) - tgz_file = zipped_files[EXPORT_SOURCES_TGZ_NAME] + # Only 1 file is guaranteed + tgz_file = next(iter(zipped_files.values())) uncompress_file(tgz_file, export_sources_folder, scope=str(ref)) def get_package(self, pref, remote, metadata=None): @@ -179,11 +181,11 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata): metadata, only_metadata=False) zipped_files = {k: v for k, v in zipped_files.items() if not k.startswith(METADATA)} # quick server package integrity check: - for f in ("conaninfo.txt", "conanmanifest.txt"): + for f in (CONANINFO, CONAN_MANIFEST): if f not in zipped_files: raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: no {f}") - package_file = next((f for f in zipped_files if "conan_package" in f), None) + package_file = next((f for f in zipped_files if PACKAGE_FILE_NAME in f), None) if not package_file: raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: " f"no conan_package found") diff --git a/conan/internal/rest/rest_client_v2.py b/conan/internal/rest/rest_client_v2.py index db3681ee9fa..9c8d98160fb 100644 --- a/conan/internal/rest/rest_client_v2.py +++ b/conan/internal/rest/rest_client_v2.py @@ -9,6 +9,8 @@ from uuid import getnode as get_mac from conan.api.output import ConanOutput +from conan.internal.paths import EXPORT_SOURCES_FILE_NAME, CONANINFO, CONAN_MANIFEST, \ + EXPORT_FILE_NAME from conan.internal.rest.caching_file_downloader import ConanInternalCacheDownloader from conan.internal.rest import response_to_str from conan.internal.rest.client_routes import ClientV2Router @@ -17,7 +19,6 @@ RecipeNotFoundException, PackageNotFoundException, EXCEPTION_CODE_MAPPING from conan.errors import ConanException from conan.api.model import PkgReference -from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME from conan.api.model import RecipeReference from conan.internal.util.dates import from_iso8601_to_timestamp @@ -238,8 +239,7 @@ def get_recipe(self, ref, dest_folder, metadata, only_metadata): result = {} if not only_metadata: - accepted_files = ["conanfile.py", "conan_export.tgz", "conanmanifest.txt", - "metadata/sign"] + accepted_files = ["conanfile.py", EXPORT_FILE_NAME, CONAN_MANIFEST, "metadata/sign"] files = [f for f in server_files if any(f.startswith(m) for m in accepted_files)] # If we didn't indicated reference, server got the latest, use absolute now, it's safer urls = {fn: self.router.recipe_file(ref, fn) for fn in files} @@ -259,9 +259,10 @@ def get_recipe_sources(self, ref, dest_folder): url = self.router.recipe_snapshot(ref) data = self._get_file_list_json(url) files = data["files"] - if EXPORT_SOURCES_TGZ_NAME not in files: + sources_file = next((f for f in files if EXPORT_SOURCES_FILE_NAME in f), None) + if sources_file is None: return None - files = [EXPORT_SOURCES_TGZ_NAME, ] + files = [sources_file, ] # If we didn't indicated reference, server got the latest, use absolute now, it's safer urls = {fn: self.router.recipe_file(ref, fn) for fn in files} @@ -280,7 +281,7 @@ def get_package(self, pref, dest_folder, metadata, only_metadata): f"more than one package file: {pkg_files}") # Download only known files, but not metadata (except sign) if not only_metadata: # Retrieve package first, then metadata - accepted_files = ["conaninfo.txt", "conanmanifest.txt", "metadata/sign"] + accepted_files = [CONANINFO, CONAN_MANIFEST, "metadata/sign"] if len(pkg_files) == 1: accepted_files.append(pkg_files[0]) files = [f for f in server_files if any(f.startswith(m) for m in accepted_files)] diff --git a/conan/internal/util/files.py b/conan/internal/util/files.py index f7b0d3e186f..0884ccf2fac 100644 --- a/conan/internal/util/files.py +++ b/conan/internal/util/files.py @@ -255,19 +255,7 @@ def mkdir(path): os.makedirs(path) -def tar_extract(fileobj, destination_dir, is_tar_zst=False): - if is_tar_zst: - raise_if_zstandard_not_present("decompression") - - dctx = zstandard.ZstdDecompressor() - with dctx.stream_reader(fileobj) as stream_reader: - # The choice of bufsize=32768 comes from profiling decompression at various - # values and finding that bufsize value consistently performs well. - with tarfile.open(fileobj=stream_reader, bufsize=32768, mode="r|") as the_tar: - the_tar.extractall(path=destination_dir, - filter=lambda tarinfo, _: tarinfo) - return - +def tar_extract(fileobj, destination_dir): the_tar = tarfile.open(fileobj=fileobj) # NOTE: The errorlevel=2 has been removed because it was failing in Win10, it didn't allow to # "could not change modification time", with time=0 diff --git a/conan/internal/util/zstd.py b/conan/internal/util/zstd.py deleted file mode 100644 index 3e6f2243eaa..00000000000 --- a/conan/internal/util/zstd.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import tarfile - -from conan.errors import ConanException - -try: - import zstandard - zstandard_exception = None -except ImportError as e: - zstandard_exception = e - - -def raise_if_zstandard_not_present(operation): - if zstandard_exception: - raise ConanException( - f"zstandard {operation} was requested, but the required package is not present. " - f"Please install it using 'pip install zstandard' and try again. " - f"Exception details: {zstandard_exception}") - - -def tar_zst_compress(tar_path, files, compresslevel=None): - raise_if_zstandard_not_present("compression") - - with open(tar_path, "wb") as tarfile_obj: - # Only provide level if it was overridden by config. - zstd_kwargs = {} - if compresslevel is not None: - zstd_kwargs["level"] = compresslevel - - dctx = zstandard.ZstdCompressor(write_checksum=True, threads=-1, **zstd_kwargs) - - # Create a zstd stream writer so tarfile writes uncompressed data to - # the zstd stream writer, which in turn writes compressed data to the - # output tar.zst file. - with dctx.stream_writer(tarfile_obj) as stream_writer: - # The choice of bufsize=32768 comes from profiling compression at various - # values and finding that bufsize value consistently performs well. - # The variance in compression times at bufsize<=64KB is small. It is only - # when bufsize>=128KB that compression times start increasing. - with tarfile.open(mode="w|", fileobj=stream_writer, bufsize=32768, - format=tarfile.PAX_FORMAT) as tar: - current_frame_bytes = 0 - for filename, abs_path in sorted(files.items()): - tar.add(abs_path, filename, recursive=False) - - # Flush the current frame if it has reached a large enough size. - # There is no required size, but 128MB is a good starting point - # because it allows for faster random access to the file. - current_frame_bytes += os.path.getsize(abs_path) - if current_frame_bytes >= 134217728: - stream_writer.flush(zstandard.FLUSH_FRAME) - current_frame_bytes = 0 diff --git a/conan/test/utils/test_files.py b/conan/test/utils/test_files.py index 7b3f011116d..7b099b064c0 100644 --- a/conan/test/utils/test_files.py +++ b/conan/test/utils/test_files.py @@ -12,7 +12,6 @@ from conan.tools.files.files import untargz from conan.internal.subsystems import get_cased_path from conan.errors import ConanException -from conan.internal.paths import PACKAGE_TGZ_NAME def wait_until_removed(folder): @@ -59,6 +58,7 @@ def uncompress_packaged_files(paths, pref): pref.revision = prev package_path = paths.package(pref) + PACKAGE_TGZ_NAME = "conan_package.tgz" if not(os.path.exists(os.path.join(package_path, PACKAGE_TGZ_NAME))): raise ConanException("%s not found in %s" % (PACKAGE_TGZ_NAME, package_path)) tmp = temp_folder() diff --git a/test/integration/command/upload/upload_test.py b/test/integration/command/upload/upload_test.py index d9eb6888caf..30de88a034d 100644 --- a/test/integration/command/upload/upload_test.py +++ b/test/integration/command/upload/upload_test.py @@ -12,7 +12,6 @@ from conan.errors import ConanException from conan.api.model import PkgReference from conan.internal.api.uploader import gzopen_without_timestamps -from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME, PACKAGE_TGZ_NAME from conan.test.utils.tools import NO_SETTINGS_PACKAGE_ID, TestClient, TestServer, \ GenConanfile, TestRequester, TestingResponse from conan.internal.util.files import is_dirty, save, set_dirty, sha1sum @@ -129,7 +128,7 @@ def gzopen_patched(name, mode="r", fileobj=None, **kwargs): # noqa export_download_folder = layout.download_export() - tgz = os.path.join(export_download_folder, EXPORT_SOURCES_TGZ_NAME) + tgz = os.path.join(export_download_folder, "conan_sources.tgz") assert os.path.exists(tgz) assert is_dirty(tgz) @@ -147,7 +146,7 @@ def test_broken_package_tgz(self): pref = client.created_layout().reference def gzopen_patched(name, fileobj, compresslevel=None): # noqa - if name == PACKAGE_TGZ_NAME: + if name == "conan_package.tgz": raise ConanException("Error gzopen %s" % name) return gzopen_without_timestamps(name, fileobj) with patch('conan.internal.api.uploader.gzopen_without_timestamps', new=gzopen_patched): @@ -155,7 +154,7 @@ def gzopen_patched(name, fileobj, compresslevel=None): # noqa assert "Error gzopen conan_package.tgz" in client.out download_folder = client.get_latest_pkg_layout(pref).download_package() - tgz = os.path.join(download_folder, PACKAGE_TGZ_NAME) + tgz = os.path.join(download_folder, "conan_package.tgz") assert os.path.exists(tgz) assert is_dirty(tgz) diff --git a/test/integration/symlinks/symlinks_test.py b/test/integration/symlinks/symlinks_test.py index 7bd09064c86..009cb783f65 100644 --- a/test/integration/symlinks/symlinks_test.py +++ b/test/integration/symlinks/symlinks_test.py @@ -6,7 +6,6 @@ from conan.api.model import PkgReference from conan.api.model import RecipeReference -from conan.internal.paths import PACKAGE_TGZ_NAME from conan.test.assets.genconanfile import GenConanfile from conan.test.utils.test_files import temp_folder from conan.test.utils.tools import TestClient, TestServer @@ -306,7 +305,7 @@ def package(self): client.run("upload * -r=default -c") # We can uncompress it without warns - tgz = os.path.join(p_folder, PACKAGE_TGZ_NAME) + tgz = os.path.join(p_folder, "conan_package.tgz") client.run_command('gzip -d "{}"'.format(tgz)) client.run_command('tar tvf "{}"'.format(os.path.join(p_folder, "conan_package.tar"))) lines = str(client.out).splitlines() diff --git a/test/integration/test_compressions.py b/test/integration/test_compressions.py index 61cb302b383..3ae0e0b26e9 100644 --- a/test/integration/test_compressions.py +++ b/test/integration/test_compressions.py @@ -1,17 +1,74 @@ +import json +import os +import sys +import textwrap + +import pytest + +from conan.api.model import RecipeReference, PkgReference +from conan.internal.util import load from conan.test.utils.tools import TestClient -def test_xz(): +@pytest.mark.parametrize("compress", ["gz", "xz", "zst"]) +def test_xz(compress): + if compress == "zst" and sys.version_info.minor < 14: + pytest.skip("Skipping zst compression tests") + c = TestClient(default_server_user=True) - c.save_home({"global.conf": "core.upload:compression_format=xz"}) - c.run("new header_lib") + c.save_home({"global.conf": f"core.upload:compression_format={compress}"}) + conanfile = textwrap.dedent(""" + from conan import ConanFile + from conan.tools.files import copy + + class Pkg(ConanFile): + name = "pkg" + version = "0.1" + exports_sources = "*.h" + exports = "*.yml" + def package(self): + copy(self, "*.h", self.source_folder, self.package_folder) + """) + + c.save({"conanfile.py": conanfile, + "header.h": "myheader", + "myfile.yml": "myyml"}) c.run("create -tf=") - c.run("upload * -r=default -c") - print(c.out) + c.run("upload * -r=default -c --format=json") + + # Verify the uploaded files are all txz + upload_json = json.loads(c.stdout) + rrev = upload_json["default"]["pkg/0.1"]["revisions"]["4e81a0b14da7ae918cf3dba3a07578d6"] + rfiles = rrev["files"] + assert f"conan_export.t{compress}" in rfiles + assert f"conan_sources.t{compress}" in rfiles + prevs = rrev["packages"]["da39a3ee5e6b4b0d3255bfef95601890afd80709"]["revisions"] + prev = prevs["13eb72928af98144fa7bf104b69663bc"] + pfiles = prev["files"] + assert f"conan_package.t{compress}" in pfiles + + # decompress should work anyway + c.save_home({"global.conf": ""}) c.run("remove * -c") - c.run("install --requires=mypkg/0.1") - print(c.out) - c.run("cache path mypkg/0.1") - print(c.out) - c.run("cache path mypkg/0.1:da39a3ee5e6b4b0d3255bfef95601890afd80709") - print(c.out) + c.run("install --requires=pkg/0.1") + + # checking the recipe + ref = RecipeReference.loads("pkg/0.1") + rlayout = c.get_latest_ref_layout(ref) + downloaded_files = os.listdir(rlayout.download_export()) + assert f"conan_export.t{compress}" in downloaded_files + assert f"conan_sources.t{compress}" not in downloaded_files + assert "myyml" == load(os.path.join(rlayout.export(), "myfile.yml")) + + # checking the package + pref = PkgReference(rlayout.reference, "da39a3ee5e6b4b0d3255bfef95601890afd80709") + playout = c.get_latest_pkg_layout(pref) + downloaded_files = os.listdir(playout.download_package()) + assert f"conan_package.t{compress}" in downloaded_files + assert "myheader" == load(os.path.join(playout.package(), "header.h")) + + # Force the build from source + c.run("install --requires=pkg/0.1 --build=*") + downloaded_files = os.listdir(rlayout.download_export()) + assert f"conan_export.t{compress}" in downloaded_files + assert f"conan_sources.t{compress}" in downloaded_files diff --git a/test/integration/tgz_macos_dot_files_test.py b/test/integration/tgz_macos_dot_files_test.py index 8621be5890e..44340726c8f 100644 --- a/test/integration/tgz_macos_dot_files_test.py +++ b/test/integration/tgz_macos_dot_files_test.py @@ -9,7 +9,6 @@ from conan.internal.rest.remote_manager import uncompress_file from conan.api.model import RecipeReference -from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME from conan.test.utils.tools import TestClient, NO_SETTINGS_PACKAGE_ID @@ -94,7 +93,7 @@ def _add_macos_metadata_to_file(filepath): # 3) In the upload process, the metadata is lost again export_download_folder = t.get_latest_ref_layout(pref.ref).download_export() - tgz = os.path.join(export_download_folder, EXPORT_SOURCES_TGZ_NAME) + tgz = os.path.join(export_download_folder, "conan_sources.tgz") assert not os.path.exists(tgz) t.run("upload lib/version@user/channel -r default --only-recipe") self._test_for_metadata_in_zip_file(tgz, 'file.txt', dot_file_expected=False) diff --git a/test/unittests/client/remote_manager_test.py b/test/unittests/client/remote_manager_test.py index 88cc3f9df1b..27ededef4e4 100644 --- a/test/unittests/client/remote_manager_test.py +++ b/test/unittests/client/remote_manager_test.py @@ -4,7 +4,6 @@ import pytest from conan.internal.api.uploader import compress_files -from conan.internal.paths import PACKAGE_TGZ_NAME from conan.internal.rest.remote_manager import uncompress_file from conan.test.utils.test_files import temp_folder from conan.internal.util.files import save @@ -22,9 +21,9 @@ def test_compress_files_tgz(self): "Two_file.txt": os.path.join(folder, "Two_file.txt"), } - path = compress_files(files, PACKAGE_TGZ_NAME, dest_dir=folder) + path = compress_files(files, "conan_package.tgz", dest_dir=folder) assert os.path.exists(path) - expected_path = os.path.join(folder, PACKAGE_TGZ_NAME) + expected_path = os.path.join(folder, "conan_package.tgz") assert path == expected_path def test_compress_and_uncompress_xz_files(self): From ca1fcb13f974f21934a97fc135b15bc74d765db5 Mon Sep 17 00:00:00 2001 From: memsharded Date: Wed, 3 Dec 2025 11:17:33 +0100 Subject: [PATCH 16/23] review --- conan/internal/api/uploader.py | 34 ++++++++++++++++----------- conan/internal/model/conf.py | 2 +- conan/internal/rest/remote_manager.py | 6 ++++- test/integration/test_compressions.py | 21 +++++++++++++++++ 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/conan/internal/api/uploader.py b/conan/internal/api/uploader.py index 7cf63b5f0d0..8283e441605 100644 --- a/conan/internal/api/uploader.py +++ b/conan/internal/api/uploader.py @@ -2,6 +2,7 @@ import gzip import os import shutil +import sys import tarfile import time @@ -85,6 +86,21 @@ def __init__(self, app: ConanApp, global_conf): self._app = app self._global_conf = global_conf + # No compressed file exists, need to compress + compressformat = self._global_conf.get("core.upload:compression_format", + default="gz", choices=COMPRESSIONS) + if compressformat == "zst" and sys.version_info.minor < 14: + raise ConanException("The 'core.upload:compression_format=zst' is only for Python>=3.14") + compresslevel = self._global_conf.get("core:compresslevel", check_type=int) + if compresslevel is None and compressformat == "gz": + compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int) + if compresslevel is not None: + ConanOutput().warning("core.gzip:compresslevel is deprecated, " + "use core.compresslevel instead", warn_tag="deprecated") + + self._compressformat = compressformat + self._compresslevel = compresslevel + def prepare(self, pkg_list, enabled_remotes): local_url = self._global_conf.get("core.scm:local_url", choices=["allow", "block"]) for ref, packages in pkg_list.items(): @@ -171,7 +187,7 @@ def _prepare_package(self, pref, prev_bundle): def _compressed_file(self, filename, files, download_folder, ref): output = ConanOutput(scope=str(ref)) - # Check if there is some existing compressed file alreday + # Check if there is some existing compressed file already matches = [] for extension in COMPRESSIONS: file_name = filename + extension @@ -187,21 +203,11 @@ def _compressed_file(self, filename, files, download_folder, ref): if len(matches) == 1: return matches[0] - # No compressed file exists, need to compress - compressformat = self._global_conf.get("core.upload:compression_format", - default="gz", choices=COMPRESSIONS) - compresslevel = self._global_conf.get("core:compresslevel", check_type=int) - if compresslevel is None and compressformat == "gz": - compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int) - if compresslevel is not None: - ConanOutput().warning("core.gzip:compresslevel is deprecated, " - "use core.compresslevel instead", warn_tag="deprecated") - - file_name = filename + compressformat + file_name = filename + self._compressformat package_file = os.path.join(download_folder, file_name) compressed_path = compress_files(files, file_name, download_folder, - compresslevel=compresslevel, compressformat=compressformat, - ref=ref) + compresslevel=self._compresslevel, + compressformat=self._compressformat, ref=ref) assert compressed_path == package_file assert os.path.exists(package_file) return file_name diff --git a/conan/internal/model/conf.py b/conan/internal/model/conf.py index 1e51a486551..ab59e754d23 100644 --- a/conan/internal/model/conf.py +++ b/conan/internal/model/conf.py @@ -62,7 +62,7 @@ "core.net.http:clean_system_proxy": "If defined, the proxies system env-vars will be discarded", # Compression for `conan upload` "core.upload:compression_format": "The compression format used when uploading Conan packages. " - "Possible values: 'zstd', 'xz', 'gzip' (default=gzip)", + "Possible values: 'zst', 'xz', 'gz' (default=gz)", "core.gzip:compresslevel": "The Gzip compression level for Conan artifacts (default=9)", "core:compresslevel": "The compression level for Conan artifacts (default zstd=3, gz=9)", # Excluded from revision_mode = "scm" dirty and Git().is_dirty() checks diff --git a/conan/internal/rest/remote_manager.py b/conan/internal/rest/remote_manager.py index 0f66a1685c0..86a02adca9b 100644 --- a/conan/internal/rest/remote_manager.py +++ b/conan/internal/rest/remote_manager.py @@ -1,5 +1,6 @@ import os import shutil +import sys from collections import namedtuple from typing import List @@ -191,7 +192,7 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata): f"no conan_package found") self._signer.verify(pref, download_pkg_folder, zipped_files) - tgz_file = zipped_files.pop(package_file, None) + tgz_file = zipped_files.pop(package_file) package_folder = layout.package() uncompress_file(tgz_file, package_folder, scope=str(pref.ref)) mkdir(package_folder) # Just in case it doesn't exist, because uncompress did nothing @@ -345,6 +346,9 @@ def _call_remote(self, remote, method, *args, **kwargs): def uncompress_file(src_path, dest_folder, scope=None): + if sys.version_info.major < 14 and src_path.endswith(".tzst"): + raise ConanException(f"File {os.path.basename(src_path)} compressed with 'zst', " + f"unsupported for Python<3.14 ") try: filesize = os.path.getsize(src_path) big_file = filesize > 10000000 # 10 MB diff --git a/test/integration/test_compressions.py b/test/integration/test_compressions.py index 3ae0e0b26e9..c56d7fa94e3 100644 --- a/test/integration/test_compressions.py +++ b/test/integration/test_compressions.py @@ -7,6 +7,7 @@ from conan.api.model import RecipeReference, PkgReference from conan.internal.util import load +from conan.test.assets.genconanfile import GenConanfile from conan.test.utils.tools import TestClient @@ -72,3 +73,23 @@ def package(self): downloaded_files = os.listdir(rlayout.download_export()) assert f"conan_export.t{compress}" in downloaded_files assert f"conan_sources.t{compress}" in downloaded_files + + +@pytest.mark.skipif(sys.version_info.minor >= 14, reason="validate zstd error in python<314") +def test_unsupported_zstd(): + c = TestClient(default_server_user=True) + c.save({"conanfile.py": GenConanfile("pkg", "0.1").with_package_file("myfile.h", "contents")}) + c.run("create") + playout = c.created_layout() + c.run("upload * -r=default -c -cc core.upload:compression_format=zst", assert_error=True) + assert "ERROR: The 'core.upload:compression_format=zst' is only for Python>=3.14" in c.out + + # Lets cheat, creating a fake zstd to test download + c.run("upload * -r=default -c --dry-run") + os.rename(os.path.join(playout.download_package(), "conan_package.tgz"), + os.path.join(playout.download_package(), "conan_package.tzst")) + c.run("upload * -r=default -c") + c.run("remove * -c") + c.run("install --requires=pkg/0.1", assert_error=True) + assert ("ERROR: File conan_package.tzst compressed with 'zst', unsupported " + "for Python<3.14") in c.out From 06e15c3ed70b7752cb5f825f9ec12be56f0aef74 Mon Sep 17 00:00:00 2001 From: memsharded Date: Fri, 5 Dec 2025 17:26:24 +0100 Subject: [PATCH 17/23] compression for cache save/restore too --- conan/cli/commands/cache.py | 3 ++ conan/internal/api/uploader.py | 15 +++--- conan/internal/rest/remote_manager.py | 8 ++- conan/internal/rest/rest_client_v2.py | 35 ++++++++----- .../command/cache/test_cache_save_restore.py | 30 ++++++++++-- test/integration/test_compressions.py | 49 +++++++++++++++++++ 6 files changed, 114 insertions(+), 26 deletions(-) diff --git a/conan/cli/commands/cache.py b/conan/cli/commands/cache.py index 117eea9a3d9..cd3fa5eca2a 100644 --- a/conan/cli/commands/cache.py +++ b/conan/cli/commands/cache.py @@ -185,6 +185,9 @@ def cache_save(conan_api: ConanAPI, parser, subparser, *args): else: ref_pattern = ListPattern(args.pattern) package_list = conan_api.list.select(ref_pattern) + if args.file and not args.file.endswith(".tgz"): + ConanOutput().warning("Compression using other than .tgz is experimental. Use .tzx or " + ".tzst (Python>=3.14 only) extensions for other formats") tgz_path = make_abs_path(args.file or "conan_cache_save.tgz") conan_api.cache.save(package_list, tgz_path, args.no_source) return {"results": {"Local Cache": package_list.serialize()}} diff --git a/conan/internal/api/uploader.py b/conan/internal/api/uploader.py index 8283e441605..902bb4b09a8 100644 --- a/conan/internal/api/uploader.py +++ b/conan/internal/api/uploader.py @@ -89,6 +89,11 @@ def __init__(self, app: ConanApp, global_conf): # No compressed file exists, need to compress compressformat = self._global_conf.get("core.upload:compression_format", default="gz", choices=COMPRESSIONS) + if compressformat in ("xz", "zst"): + ConanOutput().warning(f"The {compressformat} compression is highly experimental, " + f"use it at your own risk and expect issues. Feedback welcome, " + f"please report it as Github tickets", + warn_tag="risk") if compressformat == "zst" and sys.version_info.minor < 14: raise ConanException("The 'core.upload:compression_format=zst' is only for Python>=3.14") compresslevel = self._global_conf.get("core:compresslevel", check_type=int) @@ -206,8 +211,7 @@ def _compressed_file(self, filename, files, download_folder, ref): file_name = filename + self._compressformat package_file = os.path.join(download_folder, file_name) compressed_path = compress_files(files, file_name, download_folder, - compresslevel=self._compresslevel, - compressformat=self._compressformat, ref=ref) + compresslevel=self._compresslevel, ref=ref) assert compressed_path == package_file assert os.path.exists(package_file) return file_name @@ -300,21 +304,20 @@ def gzopen_without_timestamps(name, fileobj, compresslevel=None): return t -def compress_files(files, name, dest_dir, compressformat=None, compresslevel=None, ref=None, - recursive=False): +def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursive=False): t1 = time.time() # FIXME, better write to disk sequentially and not keep tgz contents in memory tgz_path = os.path.join(dest_dir, name) if ref: ConanOutput(scope=str(ref) if ref else None).info(f"Compressing {name}") - if compressformat == "zst": + if name.endswith("zst"): with tarfile.open(tgz_path, "w:zst", level=compresslevel) as tar: # noqa Py314 only for filename, abs_path in sorted(files.items()): tar.add(abs_path, filename, recursive=recursive) return tgz_path - if compressformat == "xz": + if name.endswith("xz"): with tarfile.open(tgz_path, "w:xz", preset=compresslevel, format=tarfile.PAX_FORMAT) as tar: for filename, abs_path in sorted(files.items()): tar.add(abs_path, filename, recursive=recursive) diff --git a/conan/internal/rest/remote_manager.py b/conan/internal/rest/remote_manager.py index 86a02adca9b..e130a701a3e 100644 --- a/conan/internal/rest/remote_manager.py +++ b/conan/internal/rest/remote_manager.py @@ -186,10 +186,8 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata): if f not in zipped_files: raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: no {f}") - package_file = next((f for f in zipped_files if PACKAGE_FILE_NAME in f), None) - if not package_file: - raise ConanException(f"Corrupted {pref} in '{remote.name}' remote: " - f"no conan_package found") + # This is guaranteed to exists, otherwise RestClient would have raised already + package_file = next(f for f in zipped_files if PACKAGE_FILE_NAME in f) self._signer.verify(pref, download_pkg_folder, zipped_files) tgz_file = zipped_files.pop(package_file) @@ -346,7 +344,7 @@ def _call_remote(self, remote, method, *args, **kwargs): def uncompress_file(src_path, dest_folder, scope=None): - if sys.version_info.major < 14 and src_path.endswith(".tzst"): + if sys.version_info.minor < 14 and src_path.endswith(".tzst"): raise ConanException(f"File {os.path.basename(src_path)} compressed with 'zst', " f"unsupported for Python<3.14 ") try: diff --git a/conan/internal/rest/rest_client_v2.py b/conan/internal/rest/rest_client_v2.py index 9c8d98160fb..609d185f39f 100644 --- a/conan/internal/rest/rest_client_v2.py +++ b/conan/internal/rest/rest_client_v2.py @@ -10,7 +10,7 @@ from conan.api.output import ConanOutput from conan.internal.paths import EXPORT_SOURCES_FILE_NAME, CONANINFO, CONAN_MANIFEST, \ - EXPORT_FILE_NAME + EXPORT_FILE_NAME, PACKAGE_FILE_NAME from conan.internal.rest.caching_file_downloader import ConanInternalCacheDownloader from conan.internal.rest import response_to_str from conan.internal.rest.client_routes import ClientV2Router @@ -239,8 +239,11 @@ def get_recipe(self, ref, dest_folder, metadata, only_metadata): result = {} if not only_metadata: - accepted_files = ["conanfile.py", EXPORT_FILE_NAME, CONAN_MANIFEST, "metadata/sign"] + accepted_files = ["conanfile.py", CONAN_MANIFEST, "metadata/sign"] files = [f for f in server_files if any(f.startswith(m) for m in accepted_files)] + export_file = self._find_compressed_file(ref, server_files, EXPORT_FILE_NAME) + if export_file is not None: + files.append(export_file) # If we didn't indicated reference, server got the latest, use absolute now, it's safer urls = {fn: self.router.recipe_file(ref, fn) for fn in files} self._download_and_save_files(urls, dest_folder, files, parallel=True) @@ -259,10 +262,10 @@ def get_recipe_sources(self, ref, dest_folder): url = self.router.recipe_snapshot(ref) data = self._get_file_list_json(url) files = data["files"] - sources_file = next((f for f in files if EXPORT_SOURCES_FILE_NAME in f), None) - if sources_file is None: + src_file = self._find_compressed_file(ref, files, EXPORT_SOURCES_FILE_NAME) + if src_file is None: return None - files = [sources_file, ] + files = [src_file, ] # If we didn't indicated reference, server got the latest, use absolute now, it's safer urls = {fn: self.router.recipe_file(ref, fn) for fn in files} @@ -270,20 +273,28 @@ def get_recipe_sources(self, ref, dest_folder): ret = {fn: os.path.join(dest_folder, fn) for fn in files} return ret + @staticmethod + def _find_compressed_file(ref, server_files, artifact, exists=False): + pkg_files = [f for f in server_files if f.startswith(artifact)] + if len(pkg_files) > 1: + raise ConanException(f"{ref} is corrupted in the server, it contains " + f"more than one compressed file: {pkg_files}") + if not pkg_files: + if not exists: + return None + raise ConanException(f"Recipe {ref} is corrupted in the server, it doesn't contain " + f"a {artifact} file") + return pkg_files[0] + def get_package(self, pref, dest_folder, metadata, only_metadata): url = self.router.package_snapshot(pref) data = self._get_file_list_json(url) server_files = data["files"] result = {} - pkg_files = [f for f in server_files if f.startswith("conan_package.")] - if len(pkg_files) > 1: - raise ConanException(f"Package {pref} is corrupted in the server, it contains " - f"more than one package file: {pkg_files}") # Download only known files, but not metadata (except sign) if not only_metadata: # Retrieve package first, then metadata - accepted_files = [CONANINFO, CONAN_MANIFEST, "metadata/sign"] - if len(pkg_files) == 1: - accepted_files.append(pkg_files[0]) + pkg_file = self._find_compressed_file(pref, server_files, PACKAGE_FILE_NAME, exists=True) + accepted_files = [CONANINFO, pkg_file, CONAN_MANIFEST, "metadata/sign"] files = [f for f in server_files if any(f.startswith(m) for m in accepted_files)] # If we didn't indicated reference, server got the latest, use absolute now, it's safer urls = {fn: self.router.package_file(pref, fn) for fn in files} diff --git a/test/integration/command/cache/test_cache_save_restore.py b/test/integration/command/cache/test_cache_save_restore.py index a6bc91b495d..005c608e49f 100644 --- a/test/integration/command/cache/test_cache_save_restore.py +++ b/test/integration/command/cache/test_cache_save_restore.py @@ -2,6 +2,7 @@ import os import platform import shutil +import sys import tarfile import time @@ -14,7 +15,6 @@ from conan.internal.util.files import save, load - def test_cache_save_restore(): c = TestClient() c.save({"conanfile.py": GenConanfile().with_settings("os")}) @@ -199,7 +199,8 @@ def test_cache_save_restore_metadata(): # FIXME: check the timestamps of the conan cache restore -@pytest.mark.skipif(platform.system() == "Windows", reason="Fails in windows in ci because of the low precission of the clock") +@pytest.mark.skipif(platform.system() == "Windows", + reason="Fails in windows in ci because of the low precission of the clock") def test_cache_save_restore_multiple_revisions(): c = TestClient() c.save({"conanfile.py": GenConanfile("pkg", "0.1")}) @@ -214,7 +215,6 @@ def test_cache_save_restore_multiple_revisions(): c.run("create .") rrev3 = c.exported_recipe_revision() - def check_ordered_revisions(client): client.run("list *#* --format=json") revisions = json.loads(client.stdout)["Local Cache"]["pkg/0.1"]["revisions"] @@ -293,3 +293,27 @@ def test_cache_save_restore_custom_storage_path(src_store, dst_store): c2.run("cache restore conan_cache_save.tgz") c2.run("list *:*") assert "pkg/1.0" in c2.out + + +@pytest.mark.parametrize("compress", ["gz", "xz", "zst"]) +def test_cache_save_restore_compressions(compress): + """ we accept different compressions formats""" + if compress == "zst" and sys.version_info.minor < 14: + pytest.skip("Skipping zst compression tests") + + conan_file = GenConanfile() \ + .with_settings("os") \ + .with_package_file("bin/file.txt", "content!!") + + client = TestClient() + client.save({"conanfile.py": conan_file}) + client.run("create . --name=pkg --version=1.0 -s os=Linux") + client.run(f"cache save pkg/*:* --file=mysave.t{compress}") + cache_path = os.path.join(client.current_folder, f"mysave.t{compress}") + assert os.path.exists(cache_path) + + c2 = TestClient() + shutil.copy2(cache_path, c2.current_folder) + c2.run(f"cache restore mysave.t{compress}") + c2.run("list *:*#*") + assert "pkg/1.0" in c2.out diff --git a/test/integration/test_compressions.py b/test/integration/test_compressions.py index c56d7fa94e3..4813c074efd 100644 --- a/test/integration/test_compressions.py +++ b/test/integration/test_compressions.py @@ -93,3 +93,52 @@ def test_unsupported_zstd(): c.run("install --requires=pkg/0.1", assert_error=True) assert ("ERROR: File conan_package.tzst compressed with 'zst', unsupported " "for Python<3.14") in c.out + + +class TestDuplicatedInServerErrors: + + def test_duplicated_export(self): + c = TestClient(default_server_user=True) + c.save({"conanfile.py": GenConanfile("pkg", "0.1"), + "conandata.yml": ""}) + c.run("export") + c.run("upload * -r=default -c") + c.run("remove * -c") + c.run("export") + c.run("upload * -r=default -c -cc core.upload:compression_format=xz --force") + assert ("WARN: risk: The xz compression is highly experimental, use it at your " + "own risk and expect issues" in c.out) + + c.run("remove * -c") + c.run("install --requires=pkg/0.1", assert_error=True) + assert ("it contains more than one compressed file: " + "['conan_export.tgz', 'conan_export.txz']") in c.out + + def test_duplicated_source(self): + c = TestClient(default_server_user=True) + c.save({"conanfile.py": GenConanfile("pkg", "0.1").with_exports_sources("*.h"), + "myheader.h": "content"}) + c.run("export") + c.run("upload * -r=default -c") + c.run("remove * -c") + c.run("export") + c.run("upload * -r=default -c -cc core.upload:compression_format=xz --force") + + c.run("remove * -c") + c.run("install --requires=pkg/0.1 --build=missing", assert_error=True) + assert ("it contains more than one compressed file: " + "['conan_sources.tgz', 'conan_sources.txz']") in c.out + + def test_duplicated_package(self): + c = TestClient(default_server_user=True) + c.save({"conanfile.py": GenConanfile("pkg", "0.1")}) + c.run("create") + c.run("upload * -r=default -c") + c.run("remove * -c") + c.run("create") + c.run("upload * -r=default -c -cc core.upload:compression_format=xz --force") + + c.run("remove * -c") + c.run("install --requires=pkg/0.1", assert_error=True) + assert ("it contains more than one compressed file: " + "['conan_package.tgz', 'conan_package.txz']") in c.out From 60ef29c5940c20a46ad9fb38387192bb7b265d0b Mon Sep 17 00:00:00 2001 From: memsharded Date: Sat, 6 Dec 2025 00:04:51 +0100 Subject: [PATCH 18/23] fix unit test --- test/unittests/client/remote_manager_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unittests/client/remote_manager_test.py b/test/unittests/client/remote_manager_test.py index 27ededef4e4..60f01384ea9 100644 --- a/test/unittests/client/remote_manager_test.py +++ b/test/unittests/client/remote_manager_test.py @@ -36,7 +36,7 @@ def test_compress_and_uncompress_xz_files(self): "Two_file.txt": os.path.join(folder, "Two_file.txt"), } - path = compress_files(files, "conan_package.txz", dest_dir=folder, compressformat="xz") + path = compress_files(files, "conan_package.txz", dest_dir=folder) assert os.path.exists(path) expected_path = os.path.join(folder, "conan_package.txz") assert path == expected_path @@ -64,7 +64,7 @@ def test_compress_and_uncompress_zst_files(self): "Two_file.txt": os.path.join(folder, "Two_file.txt"), } - path = compress_files(files, "conan_package.tzst", dest_dir=folder, compressformat="zstd") + path = compress_files(files, "conan_package.tzst", dest_dir=folder) assert os.path.exists(path) expected_path = os.path.join(folder, "conan_package.tzst") assert path == expected_path From f79080e52fd826209c478c4187a357e9f11aa382 Mon Sep 17 00:00:00 2001 From: memsharded Date: Tue, 9 Dec 2025 13:52:57 +0100 Subject: [PATCH 19/23] fix tests --- conan/internal/rest/rest_client_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conan/internal/rest/rest_client_v2.py b/conan/internal/rest/rest_client_v2.py index 609d185f39f..762cbd5fd57 100644 --- a/conan/internal/rest/rest_client_v2.py +++ b/conan/internal/rest/rest_client_v2.py @@ -278,7 +278,7 @@ def _find_compressed_file(ref, server_files, artifact, exists=False): pkg_files = [f for f in server_files if f.startswith(artifact)] if len(pkg_files) > 1: raise ConanException(f"{ref} is corrupted in the server, it contains " - f"more than one compressed file: {pkg_files}") + f"more than one compressed file: {sorted(pkg_files)}") if not pkg_files: if not exists: return None From 85f8452fcb1c6f01eb588e03b1d26bec03e3c29f Mon Sep 17 00:00:00 2001 From: memsharded Date: Tue, 9 Dec 2025 14:37:51 +0100 Subject: [PATCH 20/23] fix tests --- test/integration/remote/rest_api_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/integration/remote/rest_api_test.py b/test/integration/remote/rest_api_test.py index c9f8beb75c9..4d9f7d198dc 100644 --- a/test/integration/remote/rest_api_test.py +++ b/test/integration/remote/rest_api_test.py @@ -169,7 +169,8 @@ def _upload_package(self, package_reference, base_files=None): files = {"conanfile.py": GenConanfile("3").with_requires("1", "12").with_exports("*"), "hello.cpp": "hello", - "conanmanifest.txt": ""} + "conanmanifest.txt": "", + "conan_package.tgz": ""} if base_files: files.update(base_files) From b2310b1a8fd889eda30530e84c2ff8cd0feb86ed Mon Sep 17 00:00:00 2001 From: memsharded Date: Fri, 9 Jan 2026 14:00:33 +0100 Subject: [PATCH 21/23] fix save/restore with Path --- conan/api/subapi/cache.py | 8 ++++---- test/integration/test_compressions.py | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/conan/api/subapi/cache.py b/conan/api/subapi/cache.py index 7ebfdb289a2..ba8323dffff 100644 --- a/conan/api/subapi/cache.py +++ b/conan/api/subapi/cache.py @@ -149,9 +149,10 @@ def save(self, package_list: PackagesList, tgz_path, no_source=False) -> None: cache_folder = cache.store # Note, this is not the home, but the actual package cache out = ConanOutput() mkdir(os.path.dirname(tgz_path)) - compressformat = next((e for e in COMPRESSIONS if tgz_path.endswith(e)), None) + tgz_name = os.path.basename(tgz_path) + compressformat = next((e for e in COMPRESSIONS if tgz_name.endswith(e)), None) if not compressformat: - raise ConanException(f"Unsupported compression format for {tgz_path}") + raise ConanException(f"Unsupported compression format for {tgz_name}") compresslevel = get_compress_level(compressformat, global_conf) tar_files: dict[str, str] = {} # {path_in_tar: abs_path} @@ -195,8 +196,7 @@ def save(self, package_list: PackagesList, tgz_path, no_source=False) -> None: pkglist_path = os.path.join(tempfile.gettempdir(), "pkglist.json") save(pkglist_path, serialized) tar_files["pkglist.json"] = pkglist_path - compress_files(tar_files, os.path.basename(tgz_path), os.path.dirname(tgz_path), - compresslevel, recursive=True) + compress_files(tar_files, tgz_name, os.path.dirname(tgz_path), compresslevel, recursive=True) remove(pkglist_path) ConanOutput().success(f"Created cache save file: {tgz_path}") diff --git a/test/integration/test_compressions.py b/test/integration/test_compressions.py index 4813c074efd..07c06a9f902 100644 --- a/test/integration/test_compressions.py +++ b/test/integration/test_compressions.py @@ -106,8 +106,7 @@ def test_duplicated_export(self): c.run("remove * -c") c.run("export") c.run("upload * -r=default -c -cc core.upload:compression_format=xz --force") - assert ("WARN: risk: The xz compression is highly experimental, use it at your " - "own risk and expect issues" in c.out) + assert "WARN: risk: The 'xz' compression is experimental" in c.out c.run("remove * -c") c.run("install --requires=pkg/0.1", assert_error=True) From ef1c8f92016791e596bf12d6f64d8ab360ef6cf5 Mon Sep 17 00:00:00 2001 From: memsharded Date: Tue, 13 Jan 2026 17:05:36 +0100 Subject: [PATCH 22/23] last review --- conan/internal/api/uploader.py | 21 ++++++++++++--------- conan/internal/model/conf.py | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/conan/internal/api/uploader.py b/conan/internal/api/uploader.py index 491d40acbb8..9fa56f9c3f8 100644 --- a/conan/internal/api/uploader.py +++ b/conan/internal/api/uploader.py @@ -86,22 +86,20 @@ def get_compress_level(compressformat, global_conf): msg = ("The 'xz' compression is experimental. " "Consumers using older Conan versions will not be able to install these packages. " "Feedback is welcome, please report any issues as GitHub tickets.") - ConanOutput().warning(msg, warn_tag="risk") + ConanOutput().warning(msg, warn_tag="experimental") elif compressformat == "zst": msg = ("The 'zst' compression is experimental. " "Consumers installing packages created with this format must use Python >= 3.14. " "Consumers using older Conan or Python versions will not be able to install these " "packages. Feedback is welcome, please report any issues as GitHub tickets.") - ConanOutput().warning(msg, warn_tag="risk") + ConanOutput().warning(msg, warn_tag="experimental") if compressformat == "zst" and sys.version_info.minor < 14: raise ConanException("The 'core.upload:compression_format=zst' is only for Python>=3.14") compresslevel = global_conf.get("core:compresslevel", check_type=int) if compresslevel is None and compressformat == "gz": compresslevel = global_conf.get("core.gzip:compresslevel", check_type=int) - if compresslevel is not None: - ConanOutput().warning("core.gzip:compresslevel is deprecated, " - "use core.compresslevel instead", warn_tag="deprecated") + # do not deprecate yet core.gzip:compresslevel, wait a bit to stabilize core:compresslevel return compresslevel @@ -215,7 +213,11 @@ def _compressed_file(self, filename, files, download_folder, ref): if len(matches) > 1: raise ConanException(f"{ref}: Multiple package files found for {filename}: {matches}") if len(matches) == 1: - return matches[0] + existing = matches[0] + if not existing.endswith(self._compressformat): + output.info(f"Existing {existing} compressed file, " + f"keeping it, not using '{self._compressformat}' format") + return existing file_name = filename + self._compressformat package_file = os.path.join(download_folder, file_name) @@ -315,7 +317,6 @@ def gzopen_without_timestamps(name, fileobj, compresslevel=None): def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursive=False): t1 = time.time() - # FIXME, better write to disk sequentially and not keep tgz contents in memory tgz_path = os.path.join(dest_dir, name) if ref: ConanOutput(scope=str(ref) if ref else None).info(f"Compressing {name}") @@ -324,12 +325,15 @@ def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursiv with tarfile.open(tgz_path, "w:zst", level=compresslevel) as tar: # noqa Py314 only for filename, abs_path in sorted(files.items()): tar.add(abs_path, filename, recursive=recursive) + ConanOutput().debug(f"{name} compressed in {time.time() - t1} time") return tgz_path if name.endswith("xz"): + # The default to PAX_FORMAT in case of Python 3.7 with tarfile.open(tgz_path, "w:xz", preset=compresslevel, format=tarfile.PAX_FORMAT) as tar: for filename, abs_path in sorted(files.items()): tar.add(abs_path, filename, recursive=recursive) + ConanOutput().debug(f"{name} compressed in {time.time() - t1} time") return tgz_path with set_dirty_context_manager(tgz_path), open(tgz_path, "wb") as tgz_handle: @@ -339,8 +343,7 @@ def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursiv tgz.add(abs_path, filename, recursive=recursive) tgz.close() - duration = time.time() - t1 - ConanOutput().debug(f"{name} compressed in {duration} time") + ConanOutput().debug(f"{name} compressed in {time.time() - t1} time") return tgz_path diff --git a/conan/internal/model/conf.py b/conan/internal/model/conf.py index a6ae7138975..ab59e754d23 100644 --- a/conan/internal/model/conf.py +++ b/conan/internal/model/conf.py @@ -63,7 +63,7 @@ # Compression for `conan upload` "core.upload:compression_format": "The compression format used when uploading Conan packages. " "Possible values: 'zst', 'xz', 'gz' (default=gz)", - "core.gzip:compresslevel": "The Gzip compression level for Conan artifacts (default=9). Deprecated, use core:compresslevel", + "core.gzip:compresslevel": "The Gzip compression level for Conan artifacts (default=9)", "core:compresslevel": "The compression level for Conan artifacts (default zstd=3, gz=9)", # Excluded from revision_mode = "scm" dirty and Git().is_dirty() checks "core.scm:excluded": "List of excluded patterns for builtin git dirty checks", From 985186d0b8df0a2d54825c53ac4be1742bff57d0 Mon Sep 17 00:00:00 2001 From: memsharded Date: Wed, 14 Jan 2026 00:23:01 +0100 Subject: [PATCH 23/23] fix tests --- conan/internal/api/uploader.py | 15 ++++++++------- .../command/cache/test_cache_save_restore.py | 2 +- test/integration/test_compressions.py | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/conan/internal/api/uploader.py b/conan/internal/api/uploader.py index 9fa56f9c3f8..10f9877d556 100644 --- a/conan/internal/api/uploader.py +++ b/conan/internal/api/uploader.py @@ -222,7 +222,7 @@ def _compressed_file(self, filename, files, download_folder, ref): file_name = filename + self._compressformat package_file = os.path.join(download_folder, file_name) compressed_path = compress_files(files, file_name, download_folder, - compresslevel=self._compresslevel, ref=ref) + compresslevel=self._compresslevel, scope=str(ref)) assert compressed_path == package_file assert os.path.exists(package_file) return file_name @@ -315,17 +315,18 @@ def gzopen_without_timestamps(name, fileobj, compresslevel=None): return t -def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursive=False): +def compress_files(files, name, dest_dir, compresslevel=None, scope=None, recursive=False): t1 = time.time() tgz_path = os.path.join(dest_dir, name) - if ref: - ConanOutput(scope=str(ref) if ref else None).info(f"Compressing {name}") + + out = ConanOutput(scope=scope) + out.info(f"Compressing {name}") if name.endswith("zst"): with tarfile.open(tgz_path, "w:zst", level=compresslevel) as tar: # noqa Py314 only for filename, abs_path in sorted(files.items()): tar.add(abs_path, filename, recursive=recursive) - ConanOutput().debug(f"{name} compressed in {time.time() - t1} time") + out.debug(f"{name} compressed in {time.time() - t1} time") return tgz_path if name.endswith("xz"): @@ -333,7 +334,7 @@ def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursiv with tarfile.open(tgz_path, "w:xz", preset=compresslevel, format=tarfile.PAX_FORMAT) as tar: for filename, abs_path in sorted(files.items()): tar.add(abs_path, filename, recursive=recursive) - ConanOutput().debug(f"{name} compressed in {time.time() - t1} time") + out.debug(f"{name} compressed in {time.time() - t1} time") return tgz_path with set_dirty_context_manager(tgz_path), open(tgz_path, "wb") as tgz_handle: @@ -343,7 +344,7 @@ def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursiv tgz.add(abs_path, filename, recursive=recursive) tgz.close() - ConanOutput().debug(f"{name} compressed in {time.time() - t1} time") + out.debug(f"{name} compressed in {time.time() - t1} time") return tgz_path diff --git a/test/integration/command/cache/test_cache_save_restore.py b/test/integration/command/cache/test_cache_save_restore.py index fa571993bea..6f301daa87f 100644 --- a/test/integration/command/cache/test_cache_save_restore.py +++ b/test/integration/command/cache/test_cache_save_restore.py @@ -310,7 +310,7 @@ def test_cache_save_restore_compressions(compress): client.run("create . --name=pkg --version=1.0 -s os=Linux") client.run(f"cache save pkg/*:* --file=mysave.t{compress}") if compress in ("xz", "zst"): - assert f"WARN: risk: The '{compress}' compression is experimental." in client.out + assert f"WARN: experimental: The '{compress}' compression is experimental." in client.out cache_path = os.path.join(client.current_folder, f"mysave.t{compress}") assert os.path.exists(cache_path) diff --git a/test/integration/test_compressions.py b/test/integration/test_compressions.py index 07c06a9f902..b4e8314c1a5 100644 --- a/test/integration/test_compressions.py +++ b/test/integration/test_compressions.py @@ -106,7 +106,7 @@ def test_duplicated_export(self): c.run("remove * -c") c.run("export") c.run("upload * -r=default -c -cc core.upload:compression_format=xz --force") - assert "WARN: risk: The 'xz' compression is experimental" in c.out + assert "WARN: experimental: The 'xz' compression is experimental" in c.out c.run("remove * -c") c.run("install --requires=pkg/0.1", assert_error=True)