diff --git a/tests/test_processors.py b/tests/test_processors.py
index 87c6d263..dbda616e 100644
--- a/tests/test_processors.py
+++ b/tests/test_processors.py
@@ -7,12 +7,22 @@
from zyte_parsers import Gtin as zp_Gtin
from zyte_parsers import extract_breadcrumbs
-from zyte_common_items import AggregateRating, BasePage, Breadcrumb, Gtin, ProductPage
+from zyte_common_items import (
+ AggregateRating,
+ BasePage,
+ Brand,
+ Breadcrumb,
+ Gtin,
+ Image,
+ ProductPage,
+)
from zyte_common_items.processors import (
_format_price,
brand_processor,
breadcrumbs_processor,
gtin_processor,
+ images_processor,
+ price_processor,
rating_processor,
)
@@ -125,16 +135,18 @@ def breadcrumbs(self):
"input_value,expected_value",
[
(None, None),
- ("", ""),
- ("foo", "foo"),
+ ("", None),
+ (" ", None),
+ ("foo", Brand(name="foo")),
+ (" foo ", Brand(name="foo")),
(Selector(text=""), None),
(SelectorList([]), None),
- (fromstring("
foo
"), "foo"),
- (fromstring("
"), "foo"),
- (fromstring("![foo]()
"), "foo"),
- (fromstring("![foo]()
"), "foo"),
- (Selector(text="foo
"), "foo"),
- (SelectorList([Selector(text="foo
")]), "foo"),
+ (fromstring("foo
"), Brand(name="foo")),
+ (fromstring("
"), Brand(name="foo")),
+ (fromstring("![foo]()
"), Brand(name="foo")),
+ (fromstring("![foo]()
"), Brand(name="foo")),
+ (Selector(text="foo
"), Brand(name="foo")),
+ (SelectorList([Selector(text="foo
")]), Brand(name="foo")),
],
)
def test_brand(input_value, expected_value):
@@ -158,7 +170,7 @@ def brand(self):
body="
".encode(),
)
page = MyProductPage(response=response)
- assert page.brand == "foo"
+ assert page.brand == Brand(name="foo")
@pytest.mark.parametrize(
@@ -321,3 +333,85 @@ def aggregateRating(self):
assert page.aggregateRating == AggregateRating(
ratingValue=3.8, bestRating=10, reviewCount=5
)
+
+
+@pytest.mark.parametrize(
+ "input_value,expected_value",
+ [
+ (None, None),
+ ([], []),
+ ("https://www.url.com/img.jpg", [Image(url="https://www.url.com/img.jpg")]),
+ (
+ [
+ Image("https://www.url.com/img1.jpg"),
+ Image("https://www.url.com/img2.jpg"),
+ ],
+ [
+ Image("https://www.url.com/img1.jpg"),
+ Image("https://www.url.com/img2.jpg"),
+ ],
+ ),
+ (
+ ["https://www.url.com/img1.jpg", "https://www.url.com/img2.jpg"],
+ [
+ Image("https://www.url.com/img1.jpg"),
+ Image("https://www.url.com/img2.jpg"),
+ ],
+ ),
+ (
+ [
+ {"url": "https://www.url.com/img1.jpg"},
+ {"url": "https://www.url.com/img2.jpg"},
+ ],
+ [
+ Image("https://www.url.com/img1.jpg"),
+ Image("https://www.url.com/img2.jpg"),
+ ],
+ ),
+ ],
+)
+def test_images(input_value, expected_value):
+ class ImagesPage(BasePage):
+ @field(out=[images_processor])
+ def images(self):
+ return input_value
+
+ page = ImagesPage(base_url) # type: ignore[arg-type]
+ assert page.images == expected_value
+
+
+def test_images_page():
+ class MyProductPage(ProductPage):
+ @field
+ def images(self):
+ return self.css("img::attr(href)").getall()
+
+ response = HttpResponse(
+ url="http://www.example.com/",
+ body="
".encode(),
+ )
+ page = MyProductPage(response=response)
+ assert page.images == [Image(url="https://www.url.com/img.jpg")]
+
+
+@pytest.mark.parametrize(
+ "input_value,expected_value",
+ [
+ (100, "100.00"),
+ (None, None),
+ ([], []),
+ ({}, {}),
+ (22.9, "22.90"),
+ (22.0, "22.00"),
+ ("22.9", "22.9"),
+ ("Do not apply to strings...", "Do not apply to strings..."),
+ ],
+)
+def test_prices(input_value, expected_value):
+ class PricePage(BasePage):
+ @field(out=[price_processor])
+ def price(self):
+ return input_value
+
+ page = PricePage(base_url) # type: ignore[arg-type]
+ assert page.price == expected_value
diff --git a/tox.ini b/tox.ini
index 27bf6d3a..50f412cc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -73,7 +73,7 @@ commands = mypy zyte_common_items tests
[testenv:twinecheck]
basepython = python3
deps =
- twine==4.0.2
+ twine==5.1.1
build==0.10.0
commands =
python -m build --sdist
diff --git a/zyte_common_items/pages/product.py b/zyte_common_items/pages/product.py
index fc297629..45c1500a 100644
--- a/zyte_common_items/pages/product.py
+++ b/zyte_common_items/pages/product.py
@@ -19,6 +19,7 @@
description_html_processor,
description_processor,
gtin_processor,
+ images_processor,
price_processor,
rating_processor,
simple_price_processor,
@@ -46,6 +47,7 @@ class Processors(BasePage.Processors):
gtin = [gtin_processor]
price = [price_processor]
regularPrice = [simple_price_processor]
+ images = [images_processor]
class ProductPage(
@@ -62,6 +64,7 @@ class Processors(Page.Processors):
gtin = [gtin_processor]
price = [price_processor]
regularPrice = [simple_price_processor]
+ images = [images_processor]
@attrs.define
diff --git a/zyte_common_items/processors.py b/zyte_common_items/processors.py
index e4306c11..d803f5e1 100644
--- a/zyte_common_items/processors.py
+++ b/zyte_common_items/processors.py
@@ -1,5 +1,6 @@
-from collections.abc import Iterable
+from collections.abc import Iterable, Mapping
from functools import wraps
+from numbers import Real
from typing import Any, Callable, List, Optional, Union
from clear_html import clean_node, cleaned_node_to_html, cleaned_node_to_text
@@ -21,8 +22,10 @@
from .components import (
AggregateRating,
BaseMetadata,
+ Brand,
Breadcrumb,
Gtin,
+ Image,
ProbabilityRequest,
Request,
)
@@ -104,50 +107,79 @@ def _from_zp_breadcrumb(value: zp_Breadcrumb) -> Breadcrumb:
return results
-@only_handle_nodes
-def brand_processor(value: Union[Selector, HtmlElement], page: Any) -> Any:
+def brand_processor(value: Any, page: Any) -> Any:
"""Convert the data into a brand name if possible.
- Supported inputs are :class:`~parsel.selector.Selector`,
- :class:`~parsel.selector.SelectorList` and :class:`~lxml.html.HtmlElement`.
- Other inputs are returned as is.
+ If inputs are either :class:`~parsel.selector.Selector`,
+ :class:`~parsel.selector.SelectorList` or :class:`~lxml.html.HtmlElement`, attempts
+ to extract brand data from it.
+
+ If value is a string, uses it to create a :class:`~zyte_common_items.Brand` instance.
+
+ Other inputs are returned unchanged.
"""
- return extract_brand_name(value, search_depth=2)
+ value = _handle_selectorlist(value)
+ if isinstance(value, str):
+ value = value.strip()
+ return Brand(name=value) if value else None
-@only_handle_nodes
-def price_processor(value: Union[Selector, HtmlElement], page: Any) -> Any:
+ if isinstance(value, (Selector, SelectorList, HtmlElement)):
+ if brand_name := extract_brand_name(value, search_depth=2):
+ return Brand(name=brand_name)
+ else:
+ return None
+
+ return value
+
+
+def price_processor(value: Any, page: Any) -> Any:
"""Convert the data into a price string if possible.
Uses the price-parser_ library.
Supported inputs are :class:`~parsel.selector.Selector`,
- :class:`~parsel.selector.SelectorList` and :class:`~lxml.html.HtmlElement`.
+ :class:`~parsel.selector.SelectorList`, :class:`~lxml.html.HtmlElement` and numeric values.
+
Other inputs are returned as is.
Puts the parsed Price object into ``page._parsed_price``.
.. _price-parser: https://github.com/scrapinghub/price-parser
"""
- price = extract_price(value)
- page._parsed_price = price
- return _format_price(price)
+ value = _handle_selectorlist(value)
+ if isinstance(value, Real):
+ return f"{value:.2f}"
+ elif isinstance(value, (Selector, HtmlElement)):
+ price = extract_price(value)
+ page._parsed_price = price
+ return _format_price(price)
+ else:
+ return value
-@only_handle_nodes
-def simple_price_processor(value: Union[Selector, HtmlElement], page: Any) -> Any:
+
+def simple_price_processor(value: Any, page: Any) -> Any:
"""Convert the data into a price string if possible.
Uses the price-parser_ library.
Supported inputs are :class:`~parsel.selector.Selector`,
- :class:`~parsel.selector.SelectorList` and :class:`~lxml.html.HtmlElement`.
+ :class:`~parsel.selector.SelectorList`, :class:`~lxml.html.HtmlElement` and numeric values.
+
Other inputs are returned as is.
.. _price-parser: https://github.com/scrapinghub/price-parser
"""
- price = extract_price(value)
- return _format_price(price)
+ value = _handle_selectorlist(value)
+
+ if isinstance(value, Real):
+ return f"{value:.2f}"
+ elif isinstance(value, (Selector, HtmlElement)):
+ price = extract_price(value)
+ return _format_price(price)
+ else:
+ return value
@only_handle_nodes
@@ -330,6 +362,37 @@ def aggregateRating(self):
return value
+def images_processor(value: Any, page: Any) -> Any:
+ """Convert the data into a list of :class:`~zyte_common_items.Image`
+ objects if possible.
+
+ If the input is a string, it's used as a url for returning image object.
+
+ If input is either an iterable of strings or mappings with "url" key, they are
+ used to populate image objects.
+
+ Other inputs are returned unchanged.
+ """
+
+ if isinstance(value, str):
+ return [Image(url=value)]
+
+ if isinstance(value, Iterable):
+ results: List[Any] = []
+ for item in value:
+ if isinstance(item, Image):
+ results.append(item)
+ elif isinstance(item, Mapping):
+ if url := item.get("url"):
+ results.append(Image(url=url))
+ elif isinstance(item, str):
+ results.append(Image(url=item))
+
+ return results
+
+ return value
+
+
def probability_request_list_processor(
request_list: List[Request],
) -> List[ProbabilityRequest]: