From aac65f0b86c8af429a16503de357af1390131a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Mon, 2 May 2022 09:16:24 +0200 Subject: [PATCH 1/3] Add json_metadata --- src/pip/_internal/metadata/base.py | 15 ++++++ src/pip/_internal/metadata/json.py | 80 ++++++++++++++++++++++++++++ tests/unit/metadata/test_metadata.py | 22 +++++++- 3 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 src/pip/_internal/metadata/json.py diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index d226dec8b3f..a4055dae940 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -9,8 +9,10 @@ from typing import ( IO, TYPE_CHECKING, + Any, Collection, Container, + Dict, Iterable, Iterator, List, @@ -38,6 +40,8 @@ from pip._internal.utils.packaging import safe_extra from pip._internal.utils.urls import url_to_path +from .json import msg_to_json + if TYPE_CHECKING: from typing import Protocol else: @@ -379,6 +383,17 @@ def metadata(self) -> email.message.Message: """ return self._metadata_cached() + @property + def json_metadata(self) -> Dict[str, Any]: + """PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO. + + This should return an empty dict if the metadata file is unavailable. + + :raises NoneMetadataError: If the metadata file is available, but does + not contain valid metadata. + """ + return msg_to_json(self.metadata) + @property def metadata_version(self) -> Optional[str]: """Value of "Metadata-Version:" in distribution metadata, if available.""" diff --git a/src/pip/_internal/metadata/json.py b/src/pip/_internal/metadata/json.py new file mode 100644 index 00000000000..d5ab90742cb --- /dev/null +++ b/src/pip/_internal/metadata/json.py @@ -0,0 +1,80 @@ +# Extracted from https://github.com/pfmoore/pkg_metadata + +from email.header import Header, decode_header, make_header +from email.message import Message +from typing import Any, Dict, List, Union + +METADATA_FIELDS = [ + # Name, Multiple-Use + ("Metadata-Version", False), + ("Name", False), + ("Version", False), + ("Dynamic", True), + ("Platform", True), + ("Supported-Platform", True), + ("Summary", False), + ("Description", False), + ("Description-Content-Type", False), + ("Keywords", False), + ("Home-page", False), + ("Download-URL", False), + ("Author", False), + ("Author-email", False), + ("Maintainer", False), + ("Maintainer-email", False), + ("License", False), + ("Classifier", True), + ("Requires-Dist", True), + ("Requires-Python", False), + ("Requires-External", True), + ("Project-URL", True), + ("Provides-Extra", True), + ("Provides-Dist", True), + ("Obsoletes-Dist", True), +] + + +def json_name(field: str) -> str: + return field.lower().replace("-", "_") + + +def msg_to_json(msg: Message) -> Dict[str, Any]: + def sanitise_header(h: Union[Header, str]) -> str: + if isinstance(h, Header): + chunks = [] + for bytes, encoding in decode_header(h): + if encoding == "unknown-8bit": + try: + # See if UTF-8 works + bytes.decode("utf-8") + encoding = "utf-8" + except UnicodeDecodeError: + # If not, latin1 at least won't fail + encoding = "latin1" + chunks.append((bytes, encoding)) + return str(make_header(chunks)) + return str(h) + + result = {} + for field, multi in METADATA_FIELDS: + if field not in msg: + continue + key = json_name(field) + if multi: + value: Union[str, List[str]] = [ + sanitise_header(v) for v in msg.get_all(field) + ] + else: + value = sanitise_header(msg.get(field)) + if key == "keywords": + if "," in value: + value = [v.strip() for v in value.split(",")] + else: + value = value.split() + result[key] = value + + payload = msg.get_payload() + if payload: + result["description"] = payload + + return result diff --git a/tests/unit/metadata/test_metadata.py b/tests/unit/metadata/test_metadata.py index 57e09acf76f..bee6f7e5b19 100644 --- a/tests/unit/metadata/test_metadata.py +++ b/tests/unit/metadata/test_metadata.py @@ -6,8 +6,14 @@ import pytest from pip._vendor.packaging.utils import NormalizedName -from pip._internal.metadata import BaseDistribution, get_directory_distribution +from pip._internal.metadata import ( + BaseDistribution, + get_directory_distribution, + get_wheel_distribution, +) +from pip._internal.metadata.base import FilesystemWheel from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, ArchiveInfo +from tests.lib.wheel import make_wheel @mock.patch.object(BaseDistribution, "read_text", side_effect=FileNotFoundError) @@ -82,3 +88,17 @@ class FakeDistribution(BaseDistribution): mock_read_text.assert_called_once_with(DIRECT_URL_METADATA_NAME) assert direct_url.url == "https://e.c/p.tgz" assert isinstance(direct_url.info, ArchiveInfo) + + +def test_json_metadata(tmp_path: Path) -> None: + """Basic test of BaseDistribution json_metadata. + + More tests are available in the original pkg_metadata project where this + function comes from, and which we may vendor in the future. + """ + wheel_path = make_wheel(name="pkga", version="1.0.1").save_to_dir(tmp_path) + wheel = FilesystemWheel(wheel_path) + dist = get_wheel_distribution(wheel, "pkga") + json_metadata = dist.json_metadata + assert json_metadata["name"] == "pkga" + assert json_metadata["version"] == "1.0.1" From 675b4faa3093fc76fce4b7b9b9b314a949c3a418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Fri, 27 May 2022 10:19:59 +0200 Subject: [PATCH 2/3] Rename json.py to _json.py To make it explicit that it is a private implementation detail. --- src/pip/_internal/metadata/{json.py => _json.py} | 0 src/pip/_internal/metadata/base.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/pip/_internal/metadata/{json.py => _json.py} (100%) diff --git a/src/pip/_internal/metadata/json.py b/src/pip/_internal/metadata/_json.py similarity index 100% rename from src/pip/_internal/metadata/json.py rename to src/pip/_internal/metadata/_json.py diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index a4055dae940..2fd46f40360 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -40,7 +40,7 @@ from pip._internal.utils.packaging import safe_extra from pip._internal.utils.urls import url_to_path -from .json import msg_to_json +from ._json import msg_to_json if TYPE_CHECKING: from typing import Protocol From ae6737145061bb9d4cab9307a0840fc49f0202d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Tue, 21 Jun 2022 00:06:20 +0200 Subject: [PATCH 3/3] Rename json_metadata to metadata_dict --- src/pip/_internal/metadata/_json.py | 4 ++++ src/pip/_internal/metadata/base.py | 2 +- tests/unit/metadata/test_metadata.py | 10 +++++----- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/pip/_internal/metadata/_json.py b/src/pip/_internal/metadata/_json.py index d5ab90742cb..336b52f1efd 100644 --- a/src/pip/_internal/metadata/_json.py +++ b/src/pip/_internal/metadata/_json.py @@ -39,6 +39,8 @@ def json_name(field: str) -> str: def msg_to_json(msg: Message) -> Dict[str, Any]: + """Convert a Message object into a JSON-compatible dictionary.""" + def sanitise_header(h: Union[Header, str]) -> str: if isinstance(h, Header): chunks = [] @@ -67,6 +69,8 @@ def sanitise_header(h: Union[Header, str]) -> str: else: value = sanitise_header(msg.get(field)) if key == "keywords": + # Accept both comma-separated and space-separated + # forms, for better compatibility with old data. if "," in value: value = [v.strip() for v in value.split(",")] else: diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index 2fd46f40360..2823b67540e 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -384,7 +384,7 @@ def metadata(self) -> email.message.Message: return self._metadata_cached() @property - def json_metadata(self) -> Dict[str, Any]: + def metadata_dict(self) -> Dict[str, Any]: """PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO. This should return an empty dict if the metadata file is unavailable. diff --git a/tests/unit/metadata/test_metadata.py b/tests/unit/metadata/test_metadata.py index bee6f7e5b19..0e8f3ffc1cc 100644 --- a/tests/unit/metadata/test_metadata.py +++ b/tests/unit/metadata/test_metadata.py @@ -90,8 +90,8 @@ class FakeDistribution(BaseDistribution): assert isinstance(direct_url.info, ArchiveInfo) -def test_json_metadata(tmp_path: Path) -> None: - """Basic test of BaseDistribution json_metadata. +def test_metadata_dict(tmp_path: Path) -> None: + """Basic test of BaseDistribution metadata_dict. More tests are available in the original pkg_metadata project where this function comes from, and which we may vendor in the future. @@ -99,6 +99,6 @@ def test_json_metadata(tmp_path: Path) -> None: wheel_path = make_wheel(name="pkga", version="1.0.1").save_to_dir(tmp_path) wheel = FilesystemWheel(wheel_path) dist = get_wheel_distribution(wheel, "pkga") - json_metadata = dist.json_metadata - assert json_metadata["name"] == "pkga" - assert json_metadata["version"] == "1.0.1" + metadata_dict = dist.metadata_dict + assert metadata_dict["name"] == "pkga" + assert metadata_dict["version"] == "1.0.1"