From 6ab542552b2363a1119cfa6113ca23402d7f13d2 Mon Sep 17 00:00:00 2001 From: M Bussonnier Date: Tue, 6 Aug 2024 14:12:16 +0200 Subject: [PATCH] fetch_string_and_headers compat: raise in and out of pyodide Currently only the not_in_pyodide will raise on non-success, because this is the default behavior of urllib, the in_pyodide will not, so I added a raise_for_status. It is better to raise, as otherwise the package parser will potentially get proper URL and not manage to parse it, and decide there is no wheels, while we actually just got an error (404, or maybe 500). In addition wraps both case in a custom local HttpStatusError, so that we can actually catch these errors in the right places when we encounter them. Also add handling for PyPI 404 Now that warehouse set cors to 404, (https://github.com/pypi/warehouse/pull/16339) we need to change the checked exceptions as there is no more network errors. --- .github/workflows/main.yml | 2 ++ micropip/_compat/__init__.py | 3 ++ micropip/_compat/_compat_in_pyodide.py | 29 +++++++++++++++-- micropip/_compat/_compat_not_in_pyodide.py | 16 +++++++++- micropip/_compat/compatibility_layer.py | 8 +++++ micropip/package_index.py | 28 ++++++++++++++-- tests/conftest.py | 8 +++++ tests/test_compat.py | 37 ++++++++++++++++++++++ tests/test_package_index.py | 4 +-- 9 files changed, 127 insertions(+), 8 deletions(-) create mode 100644 tests/test_compat.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b68d295..edf208a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -51,7 +51,9 @@ jobs: shell: bash -l {0} run: | pytest -v \ + --durations=10 \ --cov=micropip \ + --maxfail=5 \ --dist-dir=./dist/ \ --runner=${{ matrix.test-config.runner }} \ --rt ${{ matrix.test-config.runtime }} diff --git a/micropip/_compat/__init__.py b/micropip/_compat/__init__.py index 3934238..445e63d 100644 --- a/micropip/_compat/__init__.py +++ b/micropip/_compat/__init__.py @@ -34,6 +34,8 @@ to_js = compatibility_layer.to_js +HttpStatusError = compatibility_layer.HttpStatusError + __all__ = [ "REPODATA_INFO", @@ -45,4 +47,5 @@ "loadPackage", "get_dynlibs", "to_js", + "HttpStatusError", ] diff --git a/micropip/_compat/_compat_in_pyodide.py b/micropip/_compat/_compat_in_pyodide.py index 671fc44..58fb8f1 100644 --- a/micropip/_compat/_compat_in_pyodide.py +++ b/micropip/_compat/_compat_in_pyodide.py @@ -5,9 +5,21 @@ if TYPE_CHECKING: pass +import pyodide +from packaging.version import parse from pyodide._package_loader import get_dynlibs from pyodide.ffi import IN_BROWSER, to_js -from pyodide.http import pyfetch + +if parse(pyodide.__version__) > parse("0.27"): + from pyodide.http import HttpStatusError, pyfetch +else: + + class HttpStatusError(Exception): # type: ignore [no-redef] + """we just want this to be defined, it is never going to be raised""" + + pass + + from pyodide.http import pyfetch from .compatibility_layer import CompatibilityLayer @@ -28,6 +40,15 @@ class CompatibilityInPyodide(CompatibilityLayer): + class HttpStatusError(Exception): + status_code: int + message: str + + def __init__(self, status_code: int, message: str): + self.status_code = status_code + self.message = message + super().__init__(message) + @staticmethod def repodata_info() -> dict[str, str]: return REPODATA_INFO @@ -50,7 +71,11 @@ async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes: async def fetch_string_and_headers( url: str, kwargs: dict[str, str] ) -> tuple[str, dict[str, str]]: - response = await pyfetch(url, **kwargs) + try: + response = await pyfetch(url, **kwargs) + response.raise_for_status() + except HttpStatusError as e: + raise CompatibilityInPyodide.HttpStatusError(e.status, str(e)) from e content = await response.string() headers: dict[str, str] = response.headers diff --git a/micropip/_compat/_compat_not_in_pyodide.py b/micropip/_compat/_compat_not_in_pyodide.py index 5fb3cec..96cc53b 100644 --- a/micropip/_compat/_compat_not_in_pyodide.py +++ b/micropip/_compat/_compat_not_in_pyodide.py @@ -1,6 +1,7 @@ import re from pathlib import Path from typing import IO, TYPE_CHECKING, Any +from urllib.error import HTTPError from urllib.request import Request, urlopen from urllib.response import addinfourl @@ -15,6 +16,15 @@ class CompatibilityNotInPyodide(CompatibilityLayer): # Vendored from packaging _canonicalize_regex = re.compile(r"[-_.]+") + class HttpStatusError(Exception): + status_code: int + message: str + + def __init__(self, status_code: int, message: str): + self.status_code = status_code + self.message = message + super().__init__(message) + class loadedPackages(CompatibilityLayer.loadedPackages): @staticmethod def to_py(): @@ -40,7 +50,11 @@ async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> bytes: async def fetch_string_and_headers( url: str, kwargs: dict[str, Any] ) -> tuple[str, dict[str, str]]: - response = CompatibilityNotInPyodide._fetch(url, kwargs=kwargs) + try: + response = CompatibilityNotInPyodide._fetch(url, kwargs=kwargs) + except HTTPError as e: + raise CompatibilityNotInPyodide.HttpStatusError(e.code, str(e)) from e + headers = {k.lower(): v for k, v in response.headers.items()} return response.read().decode(), headers diff --git a/micropip/_compat/compatibility_layer.py b/micropip/_compat/compatibility_layer.py index 07a0c37..b6c6c86 100644 --- a/micropip/_compat/compatibility_layer.py +++ b/micropip/_compat/compatibility_layer.py @@ -13,6 +13,14 @@ class CompatibilityLayer(ABC): All of the following methods / properties must be implemented for use both inside and outside of pyodide. """ + class HttpStatusError(ABC, Exception): + status_code: int + message: str + + @abstractmethod + def __init__(self, status_code: int, message: str): + pass + class loadedPackages(ABC): @staticmethod @abstractmethod diff --git a/micropip/package_index.py b/micropip/package_index.py index 2772b7b..f3d43e3 100644 --- a/micropip/package_index.py +++ b/micropip/package_index.py @@ -10,7 +10,7 @@ from packaging.utils import InvalidWheelFilename from packaging.version import InvalidVersion, Version -from ._compat import fetch_string_and_headers +from ._compat import HttpStatusError, fetch_string_and_headers from ._utils import is_package_compatible, parse_version from .externals.mousebender.simple import from_project_details_html from .wheelinfo import WheelInfo @@ -276,11 +276,33 @@ async def query_package( try: metadata, headers = await fetch_string_and_headers(url, _fetch_kwargs) + except HttpStatusError as e: + if e.status_code == 404: + continue + raise except OSError: - continue + # temporary pyodide compatibility. + # pypi now set proper CORS on 404 (https://github.com/pypi/warehouse/pull/16339), + # but stable pyodide (<0.27) does not yet have proper HttpStatusError exception + # so when: on pyodide and 0.26.x we ignore OSError. Once we drop support for 0.26 + # all this OSError except clause should just be gone. + try: + import pyodide + from packaging.version import parse + + if parse(pyodide.__version__) > parse("0.27"): + # reraise on more recent pyodide. + raise + continue + except ImportError: + # not in pyodide. + raise content_type = headers.get("content-type", "").lower() - parser = _select_parser(content_type, name) + try: + parser = _select_parser(content_type, name) + except ValueError as e: + raise ValueError(f"Error trying to decode url: {url}") from e return parser(metadata) else: raise ValueError( diff --git a/tests/conftest.py b/tests/conftest.py index c53d723..6f597be 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,6 +72,9 @@ def selenium_standalone_micropip(selenium_standalone, wheel_path): if not wheel_files: pytest.exit("No wheel files found in wheel/ directory") + # Change default timeout to 30s instead of 10min + selenium_standalone.driver.implicitly_wait(11) + wheel_file = wheel_files[0] with spawn_web_server(wheel_dir) as server: server_hostname, server_port, _ = server @@ -340,6 +343,7 @@ def mock_fetch(monkeypatch, mock_importlib): def _mock_package_index_gen( httpserver, pkgs=("black", "pytest", "numpy", "pytz", "snowballstemmer"), + pkgs_not_found=(), content_type="application/json", suffix="_json.json.gz", ): @@ -355,6 +359,10 @@ def _mock_package_index_gen( content_type=content_type, headers={"Access-Control-Allow-Origin": "*"}, ) + for pkg in pkgs_not_found: + httpserver.expect_request(f"/{base}/{pkg}/").respond_with_data( + "Not found", status=404, content_type="text/plain" + ) index_url = httpserver.url_for(base) diff --git a/tests/test_compat.py b/tests/test_compat.py new file mode 100644 index 0000000..492831a --- /dev/null +++ b/tests/test_compat.py @@ -0,0 +1,37 @@ +""" +test that function in compati behave the same + +""" + +import pytest +from pytest_pyodide import run_in_pyodide + + +@pytest.mark.driver_timeout(10) +def test_404(selenium_standalone_micropip, httpserver, request): + selenium_standalone_micropip.set_script_timeout(11) + + @run_in_pyodide(packages=["micropip", "packaging"]) + async def _inner_test_404_raise(selenium, url): + import pyodide + import pytest + from packaging.version import parse + + from micropip._compat import HttpStatusError, fetch_string_and_headers + + if parse(pyodide.__version__) > parse("0.27"): + ExpectedErrorClass = HttpStatusError + else: + ExpectedErrorClass = OSError + + with pytest.raises(ExpectedErrorClass): + await fetch_string_and_headers(url, {}) + + httpserver.expect_request("/404").respond_with_data( + "Not found", + status=404, + content_type="text/plain", + headers={"Access-Control-Allow-Origin": "*"}, + ) + url_404 = httpserver.url_for("/404") + _inner_test_404_raise(selenium_standalone_micropip, url_404) diff --git a/tests/test_package_index.py b/tests/test_package_index.py index 9097877..5552f60 100644 --- a/tests/test_package_index.py +++ b/tests/test_package_index.py @@ -135,8 +135,8 @@ def test_contain_placeholder(): ) async def test_query_package(mock_fixture, pkg1, pkg2, request): gen_mock_server = request.getfixturevalue(mock_fixture) - pkg1_index_url = gen_mock_server(pkgs=[pkg1]) - pkg2_index_url = gen_mock_server(pkgs=[pkg2]) + pkg1_index_url = gen_mock_server(pkgs=[pkg1], pkgs_not_found=[pkg2]) + pkg2_index_url = gen_mock_server(pkgs=[pkg2], pkgs_not_found=[pkg1]) for _index_urls in ( pkg1_index_url,