Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions conda_pypi/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
Create wheels from pypa projects.
"""

import base64
import csv
import hashlib
import itertools
import json
import os
Expand All @@ -26,6 +24,7 @@
from conda_pypi import dependencies, installer, paths
from conda_pypi.conda_build_utils import PathType, sha256_checksum
from conda_pypi.translate import CondaMetadata
from conda_pypi.utils import sha256_as_base64url


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -195,14 +194,8 @@ def update_RECORD(record_path: Path, base_path: Path, changed_path: Path):
for row in record_rows:
if row[0] == relpath:
data = changed_path.read_bytes()
size = len(data)
checksum = (
base64.urlsafe_b64encode(hashlib.sha256(data).digest())
.rstrip(b"=")
.decode("utf-8")
)
row[1] = f"sha256={checksum}"
row[2] = str(size)
row[1] = f"sha256={sha256_as_base64url(data)}"
row[2] = str(len(data))

with record_path.open(mode="w", newline="", encoding="utf-8") as record_file:
writer = csv.writer(record_file)
Expand Down
10 changes: 3 additions & 7 deletions conda_pypi/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
Install a wheel / install a conda.
"""

import base64
import hashlib
import os
import subprocess
import tempfile
Expand All @@ -18,6 +16,8 @@
from installer.records import Hash, RecordEntry
from installer.sources import WheelFile

from conda_pypi.utils import hash_as_base64url

log = logging.getLogger(__name__)


Expand All @@ -38,11 +38,7 @@ def write_to_fs(self, scheme, path, stream, is_executable):
if os.path.exists(target_path):
log.debug(f"Skipping already-installed file: {target_path}")
data = Path(target_path).read_bytes()
digest = (
base64.urlsafe_b64encode(hashlib.new(self.hash_algorithm, data).digest())
.decode("ascii")
.rstrip("=")
)
digest = hash_as_base64url(data, self.hash_algorithm)
return RecordEntry(path, Hash(self.hash_algorithm, digest), len(data))
return super().write_to_fs(scheme, path, stream, is_executable)

Expand Down
4 changes: 3 additions & 1 deletion conda_pypi/package_extractors/whl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

from typing import Literal, BinaryIO, Iterable, Tuple

from conda_pypi.utils import sha256_base64url_to_hex


SUPPORTED_SCEMES: Tuple[Scheme] = ("platlib", "purelib")

Expand Down Expand Up @@ -103,7 +105,7 @@ def _create_conda_metadata(
path = {
"_path": f"site-packages/{record.path}",
"path_type": "hardlink",
"sha256": record.hash_.value,
"sha256": sha256_base64url_to_hex(record.hash_.value if record.hash_ else None),
"size_in_bytes": record.size,
}
paths.append(path)
Expand Down
27 changes: 27 additions & 0 deletions conda_pypi/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import base64
import hashlib
import os
import sys

Expand All @@ -14,6 +16,31 @@
logger = getLogger(f"conda.{__name__}")


def hash_as_base64url(data: bytes, algorithm: str = "sha256") -> str:
"""Digest as PEP 376 RECORD style base64url (no padding)."""
return (
base64.urlsafe_b64encode(hashlib.new(algorithm, data).digest()).decode("ascii").rstrip("=")
)


def sha256_as_base64url(data: bytes) -> str:
"""SHA256 digest as PEP 376 RECORD style base64url (no padding)."""
return hash_as_base64url(data)


def sha256_base64url_to_hex(value: str | None) -> str | None:
"""Convert base64url hash (e.g. from installer) to hex for conda paths.json."""
if not value or not value.strip():
return None
try:
value = value.strip()
# Ensure value is padded to a multiple of 4 since installer strips padding
value += "=" * (-len(value) % 4)
return base64.urlsafe_b64decode(value).hex()
except ValueError:
return None


def get_prefix(prefix: os.PathLike = None, name: str = None) -> Path:
if prefix:
return Path(prefix)
Expand Down
71 changes: 45 additions & 26 deletions tests/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,46 +9,65 @@
from conda_pypi.build import build_conda


def test_build_conda_package(
def _build_demo_conda_and_paths(
tmp_env: TmpEnvFixture,
pypi_demo_package_wheel_path: Path,
tmp_path: Path,
):
"""Build demo package from wheel; return (target_package_path, paths_json)."""
build_path = tmp_path / "build"
build_path.mkdir()

repo_path = tmp_path / "repo"
repo_path.mkdir()

target_package_path = repo_path / "demo-package-0.1.0-pypi_0.conda"

with tmp_env("python=3.12", "pip") as prefix:
conda_package_path = build_conda(
build_conda(
pypi_demo_package_wheel_path,
build_path,
repo_path,
Path(prefix, get_python_short_path()),
is_editable=False,
)
assert conda_package_path is not None

# Get a list of all the files in the package
included_package_paths = [
mm.name for _, mm in package_streaming.stream_conda_component(target_package_path)
]

# Get the list of all the paths listed in the paths.json file
for tar, member in package_streaming.stream_conda_info(target_package_path):
if member.name == "info/paths.json":
paths_json = json.load(tar.extractfile(member))
paths_json_paths = [path.get("_path") for path in paths_json.get("paths")]
break

# Ensure that the path.json file matches the packages up paths
for path in paths_json_paths:
assert path in included_package_paths

# Ensure that the process didn't create pyc files.
# This is mostly a regression test, in case "installer" was to change its behavior.
assert "__pycache__" not in path, "build_conda should not have created __pycache__"
assert not path.endswith(".pyc"), "build_conda should not have created .pyc files"

paths_json = None
for tar, member in package_streaming.stream_conda_info(target_package_path):
if member.name == "info/paths.json":
paths_json = json.load(tar.extractfile(member))
break
assert paths_json is not None
return target_package_path, paths_json


def test_build_conda_package_paths_and_sha256_format(
tmp_env: TmpEnvFixture,
pypi_demo_package_wheel_path: Path,
tmp_path: Path,
):
"""Ensure paths match package and no pyc, and paths.json sha256 is hex."""
target_package_path, paths_json = _build_demo_conda_and_paths(
tmp_env, pypi_demo_package_wheel_path, tmp_path
)
paths_json_paths = [p.get("_path") for p in paths_json.get("paths", [])]
included_package_paths = {
mm.name for _, mm in package_streaming.stream_conda_component(target_package_path)
}

# Paths in paths.json match package; no __pycache__ or .pyc
missing = [p for p in paths_json_paths if p not in included_package_paths]
assert not missing, f"paths.json paths not in package: {missing}"
with_pycache = [p for p in paths_json_paths if "__pycache__" in p]
assert not with_pycache, f"build_conda should not create __pycache__: {with_pycache}"
with_pyc = [p for p in paths_json_paths if p.endswith(".pyc")]
assert not with_pyc, f"build_conda should not create .pyc files: {with_pyc}"

# Conda/solver expect sha256 in hex (not base64url from installer)
def is_hex_64(s):
return s and len(s) == 64 and all(c in "0123456789abcdef" for c in s.lower())

bad = [
(p.get("_path"), p.get("sha256"))
for p in paths_json.get("paths", [])
if p.get("sha256") and not is_hex_64(p["sha256"])
]
assert not bad, f"path sha256 must be 64-char hex: {bad}"
63 changes: 62 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,18 @@

from __future__ import annotations

import base64

import hashlib

import pytest
from conda_pypi.utils import pypi_spec_variants

from conda_pypi.utils import (
hash_as_base64url,
pypi_spec_variants,
sha256_as_base64url,
sha256_base64url_to_hex,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -34,3 +44,54 @@ def test_pypi_spec_variants_creates_name_variants():
variants = list(pypi_spec_variants("setuptools-scm"))
assert "setuptools-scm" in variants
assert "setuptools_scm" in variants


def test_hash_as_base64url_uses_specified_algorithm():
"""hash_as_base64url uses the given algorithm, not SHA-256."""
data = b"hello"
result = hash_as_base64url(data, "md5")
expected = base64.urlsafe_b64encode(hashlib.md5(data).digest()).decode("ascii").rstrip("=")
assert result == expected
assert result != hash_as_base64url(data, "sha256")


def test_sha256_as_base64url_has_no_padding():
"""sha256_as_base64url returns base64url with no padding (PEP 376 RECORD)."""
out = sha256_as_base64url(b"hello")
assert "=" not in out


def test_sha256_base64url_to_hex_matches_digest_hex():
"""sha256_base64url_to_hex(base64url) equals the digest as hex."""
data = b"hello"
digest = hashlib.sha256(data).digest()

base64url = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii")
assert sha256_base64url_to_hex(base64url) == digest.hex()


def test_sha256_base64url_to_hex_returns_64_hex_chars():
"""sha256_base64url_to_hex returns a 64-character hex string."""
data = b"x"
digest = hashlib.sha256(data).digest()

base64url = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii")
hex_out = sha256_base64url_to_hex(base64url)
assert hex_out is not None
assert len(hex_out) == 64
assert all(c in "0123456789abcdef" for c in hex_out)


@pytest.mark.parametrize("value", [None, "", " "])
def test_sha256_base64url_to_hex_returns_none_for_falsy(value):
"""sha256_base64url_to_hex returns None for None or empty/whitespace string."""
assert sha256_base64url_to_hex(value) is None


def test_sha256_base64url_to_hex_hex_acceptable_by_bytes_fromhex():
"""Hex from sha256_base64url_to_hex is valid for bytes.fromhex (conda solver)."""
base64url = sha256_as_base64url(b"any content")
hex_str = sha256_base64url_to_hex(base64url)
assert hex_str is not None
decoded = bytes.fromhex(hex_str)
assert decoded == hashlib.sha256(b"any content").digest()
Loading