diff --git a/README.md b/README.md index 0a27a0c..12a4a63 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ A simple development conda channel for testing repodata for conda-pypi. This will install the conda-pypi plugin and its dependencies: ```bash -conda install -n base 'conda-pypi>=0.5.0' 'conda-rattler-solver<0.0.6' +conda install -n base 'conda-pypi>=0.5.0' 'conda-rattler-solver>=0.6.0' ``` ### Configure the solver diff --git a/generate.py b/generate.py index 992ac0f..3e71443 100644 --- a/generate.py +++ b/generate.py @@ -11,13 +11,15 @@ import argparse import asyncio import json +from enum import Enum + try: from compression.zstd import compress as zstd_compress # Python 3.14+ except ImportError: from backports.zstd import compress as zstd_compress # type: ignore[no-redef] import httpx -import re import time +from packaging.markers import Marker from packaging.requirements import Requirement, InvalidRequirement from pathlib import Path from typing import Any @@ -30,6 +32,39 @@ _MAPPING_CACHE: dict[str, dict[str, str]] | None = None +class MarkerVar(str, Enum): + PYTHON_VERSION = "python_version" + PYTHON_FULL_VERSION = "python_full_version" + EXTRA = "extra" + SYS_PLATFORM = "sys_platform" + PLATFORM_SYSTEM = "platform_system" + OS_NAME = "os_name" + IMPLEMENTATION_NAME = "implementation_name" + PLATFORM_PYTHON_IMPLEMENTATION = "platform_python_implementation" + PLATFORM_MACHINE = "platform_machine" + + +class MarkerOp(str, Enum): + EQ = "==" + NE = "!=" + NOT_IN = "not in" + + +SYSTEM_TO_VIRTUAL_PACKAGE = { + "windows": "__win", + "win32": "__win", + "linux": "__linux", + "darwin": "__osx", + "cygwin": "__unix", +} + +OS_NAME_TO_VIRTUAL_PACKAGE = { + "nt": "__win", + "windows": "__win", + "posix": "__unix", +} + + async def load_grayskull_mapping() -> dict[str, dict[str, str]]: """Load grayskull PyPI to conda mapping from conda-pypi repository.""" global _MAPPING_CACHE @@ -77,18 +112,145 @@ def map_package_name(pypi_name: str) -> str: return normalized +def _marker_value(token: Any) -> str: + """Extract the textual value from packaging marker tokens.""" + return getattr(token, "value", str(token)) + + +def _normalize_marker_clause( + marker_name: str, op: str, marker_value: str +) -> str | None: + """Map a single PEP 508 marker atom to a MatchSpec-like fragment. + + Examples: + - ("sys_platform", "==", "win32") -> "__win" + - ("python_version", "<", "3.11") -> "python<3.11" + - ("python_version", "not in", "3.0, 3.1") -> "(python!=3.0 and python!=3.1)" + - ("implementation_name", "==", "cpython") -> None + """ + marker_name = marker_name.lower() + marker_value = marker_value.lower() + + if marker_name in {MarkerVar.PYTHON_VERSION, MarkerVar.PYTHON_FULL_VERSION}: + if op == MarkerOp.NOT_IN: + excluded_versions = [ + version.strip() + for version in marker_value.split(",") + if version.strip() + ] + if not excluded_versions: + return None + clauses = [f"python!={version}" for version in excluded_versions] + if len(clauses) == 1: + return clauses[0] + return f"({' and '.join(clauses)})" + return f"python{op}{marker_value}" + + if marker_name == MarkerVar.EXTRA and op == MarkerOp.EQ: + return None + + if marker_name in {MarkerVar.SYS_PLATFORM, MarkerVar.PLATFORM_SYSTEM}: + mapped = SYSTEM_TO_VIRTUAL_PACKAGE.get(marker_value) + if op == MarkerOp.EQ and mapped: + return mapped + if op == MarkerOp.NE and marker_value in {"win32", "windows", "cygwin"}: + return "__unix" + if op == MarkerOp.NE and marker_value == "emscripten": + return None + return None + + if marker_name == MarkerVar.OS_NAME: + mapped = OS_NAME_TO_VIRTUAL_PACKAGE.get(marker_value) + if not mapped: + return None + if op == MarkerOp.EQ: + return mapped + if op == MarkerOp.NE: + return "__unix" if mapped == "__win" else "__win" + return None + + if marker_name in { + MarkerVar.IMPLEMENTATION_NAME, + MarkerVar.PLATFORM_PYTHON_IMPLEMENTATION, + }: + if marker_value in {"cpython", "pypy", "jython"}: + return None + return None + + if marker_name == MarkerVar.PLATFORM_MACHINE: + return None + + return None + + +def _combine_conditions(left: str | None, op: str, right: str | None) -> str | None: + """Combine optional left/right expressions with a boolean operator.""" + if left is None: + return right + if right is None: + return left + if left == right: + return left + return f"({left} {op} {right})" + + +def extract_marker_condition_and_extras(marker: Marker) -> tuple[str | None, list[str]]: + """Split a Marker into optional non-extra condition and extra group names. + + Examples: + - `extra == "docs"` -> `(None, ["docs"])` + - `python_version < "3.11" and extra == "test"` -> `("python<3.11", ["test"])` + - `sys_platform == "win32"` -> `("__win", [])` + """ + extras: list[str] = [] + seen_extras: set[str] = set() + + def parse_marker_node(node: Any) -> str | None: + if isinstance(node, tuple) and len(node) == 3: + marker_name = _marker_value(node[0]) + op = _marker_value(node[1]) + marker_value = _marker_value(node[2]) + + if marker_name.lower() == MarkerVar.EXTRA and op == MarkerOp.EQ: + extra_name = marker_value.lower() + if extra_name not in seen_extras: + seen_extras.add(extra_name) + extras.append(extra_name) + return None + + return _normalize_marker_clause(marker_name, op, marker_value) + + if isinstance(node, list): + if not node: + return None + + condition_expr = parse_marker_node(node[0]) + for op, rhs in zip(node[1::2], node[2::2]): + right_condition = parse_marker_node(rhs) + condition_expr = _combine_conditions( + condition_expr, str(op).lower(), right_condition + ) + return condition_expr + + return None + + # Marker._markers is private in packaging; keep usage isolated here. + condition = parse_marker_node(getattr(marker, "_markers", [])) + return condition, extras + + def pypi_to_repodata_whl_entry( pypi_data: dict[str, Any], url_index: int = 0 ) -> dict[str, Any] | None: """ - Convert PyPI JSON endpoint data to a repodata.json packages.whl entry. + Convert PyPI JSON endpoint data to a repodata.json v3.whl entry. Args: pypi_data: Dictionary containing the complete info section from PyPI JSON endpoint url_index: Index of the wheel URL to use (typically the first one is the wheel) Returns: - Dictionary representing the entry for packages.whl, or None if wheel not found + Dictionary representing the entry for v3.whl, or None if wheel not found """ # Find a pure Python wheel (platform tag must be "none-any"). # Wheels with compiled native code use platform-specific tags such as @@ -115,9 +277,9 @@ def pypi_to_repodata_whl_entry( conda_name = map_package_name(pypi_name) version = pypi_info.get("version") - # Build dependency list and extras dict with name mapping + # Build dependency list and optional dependency groups with name mapping depends_list = [] - extras_dict: dict[str, list[str]] = {} + extra_depends_dict: dict[str, list[str]] = {} for dep in pypi_info.get("requires_dist") or []: try: req = Requirement(dep) @@ -128,13 +290,22 @@ def pypi_to_repodata_whl_entry( conda_dep = map_package_name(req.name) + str(req.specifier) if req.marker: - extra_match = re.search( - r'extra\s*==\s*["\']([^"\']+)["\']', str(req.marker) + non_extra_condition, extra_names = extract_marker_condition_and_extras( + req.marker ) - if extra_match: - extras_dict.setdefault(extra_match.group(1), []).append(conda_dep) + if extra_names: + for extra_name in extra_names: + extra_dep = conda_dep + if non_extra_condition: + marker_condition = json.dumps(non_extra_condition) + extra_dep = f"{extra_dep}[when={marker_condition}]" + extra_depends_dict.setdefault(extra_name, []).append(extra_dep) else: - depends_list.append(conda_dep) + if non_extra_condition: + marker_condition = json.dumps(non_extra_condition) + depends_list.append(f"{conda_dep}[when={marker_condition}]") + else: + depends_list.append(conda_dep) else: depends_list.append(conda_dep) @@ -146,9 +317,6 @@ def pypi_to_repodata_whl_entry( # Noarch python packages should still depend on python when PyPI omits requires_python depends_list.append("python") - # Extract filename components - filename = wheel_url.get("filename", "") - # Build the repodata entry entry = { "url": wheel_url.get("url", ""), @@ -158,7 +326,7 @@ def pypi_to_repodata_whl_entry( "build": "py3_none_any_0", "build_number": 0, "depends": depends_list, - "extras": extras_dict, + "extra_depends": extra_depends_dict, "sha256": wheel_url.get("digests", {}).get("sha256", ""), "size": wheel_url.get("size", 0), "subdir": "noarch", @@ -377,9 +545,9 @@ async def fetch_with_semaphore( "packages": {}, "packages.conda": {}, "removed": [], - "repodata_version": 1, + "repodata_version": 3, "signatures": {}, - "packages.whl": {key: value for key, value in sorted(pkg_whls.items())}, + "v3": {"whl": {key: value for key, value in sorted(pkg_whls.items())}}, } # Create output directory diff --git a/test_generate.py b/test_generate.py index 88376b4..4d150b6 100644 --- a/test_generate.py +++ b/test_generate.py @@ -106,22 +106,22 @@ def test_pypi_to_repodata_whl_entry_with_extras(): entry = pypi_to_repodata_whl_entry(pypi_data) assert entry is not None - assert "extras" in entry + assert "extra_depends" in entry - extras = entry["extras"] + extra_depends = entry["extra_depends"] # Each declared extra should appear as a key - assert "asyncio" in extras - assert "http2" in extras - assert "brotli" in extras - assert "socks" in extras + assert "asyncio" in extra_depends + assert "http2" in extra_depends + assert "brotli" in extra_depends + assert "socks" in extra_depends # Each extra's dep list should contain the right package name - assert any("anyio" in dep for dep in extras["asyncio"]) - assert any("h2" in dep for dep in extras["http2"]) - assert any("brotli" in dep for dep in extras["brotli"]) - assert any("socksio" in dep for dep in extras["socks"]) + assert any("anyio" in dep for dep in extra_depends["asyncio"]) + assert any("h2" in dep for dep in extra_depends["http2"]) + assert any("brotli" in dep for dep in extra_depends["brotli"]) + assert any("socksio" in dep for dep in extra_depends["socks"]) - # Non-extra deps must stay in depends, not bleed into extras + # Non-extra deps must stay in depends, not bleed into extra_depends assert any("certifi" in dep for dep in entry["depends"]) assert any("httpcore" in dep for dep in entry["depends"]) # Extra deps must not appear in depends @@ -152,8 +152,8 @@ def test_pypi_to_repodata_whl_entry_no_extras(): entry = pypi_to_repodata_whl_entry(pypi_data) assert entry is not None - assert "extras" in entry - assert entry["extras"] == {} + assert "extra_depends" in entry + assert entry["extra_depends"] == {} def test_pypi_to_repodata_whl_entry_no_wheel(): @@ -230,35 +230,36 @@ def test_generated_files_exist(): def test_repodata_structure(): - """Test that repodata.json has correct structure.""" + """Test that repodata.json has the correct structure (repodata v3).""" repo_root = Path(__file__).parent repodata_file = repo_root / "noarch" / "repodata.json" with open(repodata_file) as f: repodata = json.load(f) - # Check required top-level keys + # Check required top-level keys (v3 format) required_keys = [ "info", "packages", "packages.conda", - "packages.whl", "repodata_version", + "v3", ] for key in required_keys: assert key in repodata, f"Missing required key: {key}" # Check structure - assert isinstance(repodata["packages.whl"], dict) + assert "whl" in repodata["v3"] + assert isinstance(repodata["v3"]["whl"], dict) assert repodata["info"]["subdir"] == "noarch" - assert repodata["repodata_version"] == 1 + assert repodata["repodata_version"] == 3 # Check that we have at least one package - assert len(repodata["packages.whl"]) > 0, "No packages found in repodata" + assert len(repodata["v3"]["whl"]) > 0, "No packages found in repodata" def test_repodata_package_entries(): - """Test that package entries have required fields.""" + """Test that package entries have the required fields (v3 whl entries).""" repo_root = Path(__file__).parent repodata_file = repo_root / "noarch" / "repodata.json" @@ -273,14 +274,15 @@ def test_repodata_package_entries(): "build", "build_number", "depends", + "extra_depends", "sha256", "size", "subdir", "noarch", ] - # Check first package entry - packages = repodata["packages.whl"] + # Check first package entry (v3 whl) + packages = repodata["v3"]["whl"] assert len(packages) > 0, "No packages to test" first_package = next(iter(packages.values())) @@ -291,6 +293,7 @@ def test_repodata_package_entries(): assert isinstance(first_package["name"], str) assert isinstance(first_package["version"], str) assert isinstance(first_package["depends"], list) + assert isinstance(first_package["extra_depends"], dict) assert isinstance(first_package["size"], int) assert first_package["subdir"] == "noarch" assert first_package["noarch"] == "python" @@ -310,43 +313,52 @@ def test_channeldata_structure(): assert "noarch" in channeldata["subdirs"] -def test_repodata_extras_field_present(): - """Test that every package entry in repodata.json has an 'extras' field.""" +def test_repodata_extra_depends_field_present(): + """Test that every package entry in repodata.json has an 'extra_depends' field.""" repo_root = Path(__file__).parent repodata_file = repo_root / "noarch" / "repodata.json" with open(repodata_file) as f: repodata = json.load(f) - packages = repodata["packages.whl"] + packages = repodata["v3"]["whl"] assert len(packages) > 0, "No packages to test" for key, entry in packages.items(): - assert "extras" in entry, f"Package {key} is missing 'extras' field" - assert isinstance(entry["extras"], dict), f"Package {key} 'extras' is not a dict" + assert "extra_depends" in entry, ( + f"Package {key} is missing 'extra_depends' field" + ) + assert isinstance(entry["extra_depends"], dict), ( + f"Package {key} 'extra_depends' is not a dict" + ) -def test_repodata_extras_not_in_depends(): - """Test that extras deps are never duplicated in the top-level depends list.""" +def test_repodata_depends_and_extra_depends_structure(): + """Check that depends is a list and extra_depends is a dict of lists.""" repo_root = Path(__file__).parent repodata_file = repo_root / "noarch" / "repodata.json" with open(repodata_file) as f: repodata = json.load(f) - packages = repodata["packages.whl"] + packages = repodata["v3"]["whl"] for key, entry in packages.items(): - extras_deps = {dep for deps in entry.get("extras", {}).values() for dep in deps} - for dep in entry.get("depends", []): - assert dep not in extras_deps, ( - f"Package {key}: dep '{dep}' appears in both depends and extras" + assert isinstance(entry.get("depends"), list), ( + f"Package {key}: depends must be a list" + ) + assert isinstance(entry.get("extra_depends"), dict), ( + f"Package {key}: extra_depends must be a dict" + ) + for extra_name, deps in entry.get("extra_depends", {}).items(): + assert isinstance(deps, list), ( + f"Package {key} extra {extra_name}: deps must be a list" ) -def test_repodata_has_packages_with_extras(): - """Test that the generated repodata contains at least one package with non-empty extras. +def test_repodata_has_packages_with_extra_depends(): + """Test that the generated repodata contains at least one package with non-empty extra_depends. - This guards against a regression where extras are silently dropped. The + This guards against a regression where optional dependencies are silently dropped. The packages-test.txt fixture intentionally includes packages that have extras (e.g. httpx, requests) so this assertion should always pass when run after generating from that file. @@ -357,10 +369,12 @@ def test_repodata_has_packages_with_extras(): with open(repodata_file) as f: repodata = json.load(f) - packages = repodata["packages.whl"] - packages_with_extras = [k for k, v in packages.items() if v.get("extras")] - assert len(packages_with_extras) > 0, ( - "No packages with extras found in repodata. " + packages = repodata["v3"]["whl"] + packages_with_extra_depends = [ + k for k, v in packages.items() if v.get("extra_depends") + ] + assert len(packages_with_extra_depends) > 0, ( + "No packages with extra_depends found in repodata. " "Ensure packages-test.txt includes packages that declare extras " "(e.g. httpx, requests)." )