diff --git a/conda_pypi/build.py b/conda_pypi/build.py index e4c3c272..158faf58 100644 --- a/conda_pypi/build.py +++ b/conda_pypi/build.py @@ -4,9 +4,7 @@ Create wheels from pypa projects. """ -import base64 import csv -import hashlib import itertools import json import os @@ -26,6 +24,7 @@ from conda_pypi import dependencies, installer, paths from conda_pypi.conda_build_utils import PathType, sha256_checksum from conda_pypi.translate import CondaMetadata +from conda_pypi.utils import sha256_as_base64url log = logging.getLogger(__name__) @@ -195,14 +194,8 @@ def update_RECORD(record_path: Path, base_path: Path, changed_path: Path): for row in record_rows: if row[0] == relpath: data = changed_path.read_bytes() - size = len(data) - checksum = ( - base64.urlsafe_b64encode(hashlib.sha256(data).digest()) - .rstrip(b"=") - .decode("utf-8") - ) - row[1] = f"sha256={checksum}" - row[2] = str(size) + row[1] = f"sha256={sha256_as_base64url(data)}" + row[2] = str(len(data)) with record_path.open(mode="w", newline="", encoding="utf-8") as record_file: writer = csv.writer(record_file) diff --git a/conda_pypi/installer.py b/conda_pypi/installer.py index 0e7c0a7f..a990772a 100644 --- a/conda_pypi/installer.py +++ b/conda_pypi/installer.py @@ -2,8 +2,6 @@ Install a wheel / install a conda. """ -import base64 -import hashlib import os import subprocess import tempfile @@ -18,6 +16,8 @@ from installer.records import Hash, RecordEntry from installer.sources import WheelFile +from conda_pypi.utils import hash_as_base64url + log = logging.getLogger(__name__) @@ -38,11 +38,7 @@ def write_to_fs(self, scheme, path, stream, is_executable): if os.path.exists(target_path): log.debug(f"Skipping already-installed file: {target_path}") data = Path(target_path).read_bytes() - digest = ( - base64.urlsafe_b64encode(hashlib.new(self.hash_algorithm, data).digest()) - .decode("ascii") - .rstrip("=") - ) + digest = hash_as_base64url(data, self.hash_algorithm) return RecordEntry(path, Hash(self.hash_algorithm, digest), len(data)) return super().write_to_fs(scheme, path, stream, is_executable) diff --git a/conda_pypi/package_extractors/whl.py b/conda_pypi/package_extractors/whl.py index c92fd8e1..1ce59bdd 100644 --- a/conda_pypi/package_extractors/whl.py +++ b/conda_pypi/package_extractors/whl.py @@ -12,6 +12,8 @@ from typing import Literal, BinaryIO, Iterable, Tuple +from conda_pypi.utils import sha256_base64url_to_hex + SUPPORTED_SCEMES: Tuple[Scheme] = ("platlib", "purelib") @@ -103,7 +105,7 @@ def _create_conda_metadata( path = { "_path": f"site-packages/{record.path}", "path_type": "hardlink", - "sha256": record.hash_.value, + "sha256": sha256_base64url_to_hex(record.hash_.value if record.hash_ else None), "size_in_bytes": record.size, } paths.append(path) diff --git a/conda_pypi/utils.py b/conda_pypi/utils.py index a3384867..1af2820c 100644 --- a/conda_pypi/utils.py +++ b/conda_pypi/utils.py @@ -1,5 +1,7 @@ from __future__ import annotations +import base64 +import hashlib import os import sys @@ -14,6 +16,31 @@ logger = getLogger(f"conda.{__name__}") +def hash_as_base64url(data: bytes, algorithm: str = "sha256") -> str: + """Digest as PEP 376 RECORD style base64url (no padding).""" + return ( + base64.urlsafe_b64encode(hashlib.new(algorithm, data).digest()).decode("ascii").rstrip("=") + ) + + +def sha256_as_base64url(data: bytes) -> str: + """SHA256 digest as PEP 376 RECORD style base64url (no padding).""" + return hash_as_base64url(data) + + +def sha256_base64url_to_hex(value: str | None) -> str | None: + """Convert base64url hash (e.g. from installer) to hex for conda paths.json.""" + if not value or not value.strip(): + return None + try: + value = value.strip() + # Ensure value is padded to a multiple of 4 since installer strips padding + value += "=" * (-len(value) % 4) + return base64.urlsafe_b64decode(value).hex() + except ValueError: + return None + + def get_prefix(prefix: os.PathLike = None, name: str = None) -> Path: if prefix: return Path(prefix) diff --git a/tests/test_build.py b/tests/test_build.py index 0ea68912..09a244ad 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -9,46 +9,65 @@ from conda_pypi.build import build_conda -def test_build_conda_package( +def _build_demo_conda_and_paths( tmp_env: TmpEnvFixture, pypi_demo_package_wheel_path: Path, tmp_path: Path, ): + """Build demo package from wheel; return (target_package_path, paths_json).""" build_path = tmp_path / "build" build_path.mkdir() - repo_path = tmp_path / "repo" repo_path.mkdir() - target_package_path = repo_path / "demo-package-0.1.0-pypi_0.conda" with tmp_env("python=3.12", "pip") as prefix: - conda_package_path = build_conda( + build_conda( pypi_demo_package_wheel_path, build_path, repo_path, Path(prefix, get_python_short_path()), is_editable=False, ) - assert conda_package_path is not None - - # Get a list of all the files in the package - included_package_paths = [ - mm.name for _, mm in package_streaming.stream_conda_component(target_package_path) - ] - - # Get the list of all the paths listed in the paths.json file - for tar, member in package_streaming.stream_conda_info(target_package_path): - if member.name == "info/paths.json": - paths_json = json.load(tar.extractfile(member)) - paths_json_paths = [path.get("_path") for path in paths_json.get("paths")] - break - - # Ensure that the path.json file matches the packages up paths - for path in paths_json_paths: - assert path in included_package_paths - - # Ensure that the process didn't create pyc files. - # This is mostly a regression test, in case "installer" was to change its behavior. - assert "__pycache__" not in path, "build_conda should not have created __pycache__" - assert not path.endswith(".pyc"), "build_conda should not have created .pyc files" + + paths_json = None + for tar, member in package_streaming.stream_conda_info(target_package_path): + if member.name == "info/paths.json": + paths_json = json.load(tar.extractfile(member)) + break + assert paths_json is not None + return target_package_path, paths_json + + +def test_build_conda_package_paths_and_sha256_format( + tmp_env: TmpEnvFixture, + pypi_demo_package_wheel_path: Path, + tmp_path: Path, +): + """Ensure paths match package and no pyc, and paths.json sha256 is hex.""" + target_package_path, paths_json = _build_demo_conda_and_paths( + tmp_env, pypi_demo_package_wheel_path, tmp_path + ) + paths_json_paths = [p.get("_path") for p in paths_json.get("paths", [])] + included_package_paths = { + mm.name for _, mm in package_streaming.stream_conda_component(target_package_path) + } + + # Paths in paths.json match package; no __pycache__ or .pyc + missing = [p for p in paths_json_paths if p not in included_package_paths] + assert not missing, f"paths.json paths not in package: {missing}" + with_pycache = [p for p in paths_json_paths if "__pycache__" in p] + assert not with_pycache, f"build_conda should not create __pycache__: {with_pycache}" + with_pyc = [p for p in paths_json_paths if p.endswith(".pyc")] + assert not with_pyc, f"build_conda should not create .pyc files: {with_pyc}" + + # Conda/solver expect sha256 in hex (not base64url from installer) + def is_hex_64(s): + return s and len(s) == 64 and all(c in "0123456789abcdef" for c in s.lower()) + + bad = [ + (p.get("_path"), p.get("sha256")) + for p in paths_json.get("paths", []) + if p.get("sha256") and not is_hex_64(p["sha256"]) + ] + assert not bad, f"path sha256 must be 64-char hex: {bad}" diff --git a/tests/test_utils.py b/tests/test_utils.py index 3022b303..2259521e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,8 +2,18 @@ from __future__ import annotations +import base64 + +import hashlib + import pytest -from conda_pypi.utils import pypi_spec_variants + +from conda_pypi.utils import ( + hash_as_base64url, + pypi_spec_variants, + sha256_as_base64url, + sha256_base64url_to_hex, +) @pytest.mark.parametrize( @@ -34,3 +44,54 @@ def test_pypi_spec_variants_creates_name_variants(): variants = list(pypi_spec_variants("setuptools-scm")) assert "setuptools-scm" in variants assert "setuptools_scm" in variants + + +def test_hash_as_base64url_uses_specified_algorithm(): + """hash_as_base64url uses the given algorithm, not SHA-256.""" + data = b"hello" + result = hash_as_base64url(data, "md5") + expected = base64.urlsafe_b64encode(hashlib.md5(data).digest()).decode("ascii").rstrip("=") + assert result == expected + assert result != hash_as_base64url(data, "sha256") + + +def test_sha256_as_base64url_has_no_padding(): + """sha256_as_base64url returns base64url with no padding (PEP 376 RECORD).""" + out = sha256_as_base64url(b"hello") + assert "=" not in out + + +def test_sha256_base64url_to_hex_matches_digest_hex(): + """sha256_base64url_to_hex(base64url) equals the digest as hex.""" + data = b"hello" + digest = hashlib.sha256(data).digest() + + base64url = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") + assert sha256_base64url_to_hex(base64url) == digest.hex() + + +def test_sha256_base64url_to_hex_returns_64_hex_chars(): + """sha256_base64url_to_hex returns a 64-character hex string.""" + data = b"x" + digest = hashlib.sha256(data).digest() + + base64url = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") + hex_out = sha256_base64url_to_hex(base64url) + assert hex_out is not None + assert len(hex_out) == 64 + assert all(c in "0123456789abcdef" for c in hex_out) + + +@pytest.mark.parametrize("value", [None, "", " "]) +def test_sha256_base64url_to_hex_returns_none_for_falsy(value): + """sha256_base64url_to_hex returns None for None or empty/whitespace string.""" + assert sha256_base64url_to_hex(value) is None + + +def test_sha256_base64url_to_hex_hex_acceptable_by_bytes_fromhex(): + """Hex from sha256_base64url_to_hex is valid for bytes.fromhex (conda solver).""" + base64url = sha256_as_base64url(b"any content") + hex_str = sha256_base64url_to_hex(base64url) + assert hex_str is not None + decoded = bytes.fromhex(hex_str) + assert decoded == hashlib.sha256(b"any content").digest()