zytedata · kmike · Aug 26, 2024 · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/tests/test_pages_price.py b/tests/test_pages_price.py
@@ -62,21 +62,21 @@ def price(self):
     url = "https://example.com"
     page = CustomProductPage(response=HttpResponse(url=url, body=html))
     assert page.call_count == 0
-    assert page.price == "$13.2"
+    assert page.price == "13.20"
     assert page.call_count == 1
     assert page.currency is None
-    assert await page.currencyRaw is None
-    assert page.call_count == 2  # we want this to be 1
-    assert await page.currencyRaw is None
-    assert page.call_count == 2  # we want this to be 1
+    assert await page.currencyRaw == "$"
+    assert page.call_count == 1  # we want this to be 1
 assert page.call_count == 1  # we want this to be 1 
 assert page.call_count == 1  # we want this to be 1 
 assert page.call_count == 2  # we want this to be 1 
 assert page.call_count == 2  # we want this to be 1 
 assert page.call_count == 2  # we want this to be 1 
 assert page.call_count == 1  # we want this to be 1 
 assert page.call_count == 1  # we want this to be 1 
 assert page.call_count == 2  # we want this to be 1 
 assert page.call_count == 2  # we want this to be 1 
 assert page.call_count == 2  # we want this to be 1 
+    assert await page.currencyRaw == "$"
+    assert page.call_count == 1  # we want this to be 1
 
     # access currency fields before the price field
     page = CustomProductPage(response=HttpResponse(url=url, body=html))
     assert page.call_count == 0
     assert page.currency is None
-    assert await page.currencyRaw is None
+    assert await page.currencyRaw == "$"
     assert page.call_count == 1
-    assert page.price == "$13.2"
+    assert page.price == "13.20"
     assert page.call_count == 2  # we want this to be 1
 
 

diff --git a/tests/test_processors.py b/tests/test_processors.py
@@ -7,12 +7,22 @@
 from zyte_parsers import Gtin as zp_Gtin
 from zyte_parsers import extract_breadcrumbs
 
-from zyte_common_items import AggregateRating, BasePage, Breadcrumb, Gtin, ProductPage
+from zyte_common_items import (
+    AggregateRating,
+    BasePage,
+    Brand,
+    Breadcrumb,
+    Gtin,
+    Image,
+    ProductPage,
+)
 from zyte_common_items.processors import (
     _format_price,
     brand_processor,
     breadcrumbs_processor,
     gtin_processor,
+    images_processor,
+    price_processor,
     rating_processor,
 )
 
@@ -125,16 +135,16 @@ def breadcrumbs(self):
     "input_value,expected_value",
     [
         (None, None),
-        ("", ""),
-        ("foo", "foo"),
+        ("", None),
+        ("foo", Brand(name="foo")),
         (Selector(text="<html></html>"), None),
         (SelectorList([]), None),
-        (fromstring("<p>foo</p>"), "foo"),
-        (fromstring("<img alt='foo'>"), "foo"),
-        (fromstring("<p><img alt='foo'></p>"), "foo"),
-        (fromstring("<p><p><img alt='foo'></p></p>"), "foo"),
-        (Selector(text="<p>foo</p>"), "foo"),
-        (SelectorList([Selector(text="<p>foo</p>")]), "foo"),
+        (fromstring("<p>foo</p>"), Brand(name="foo")),
+        (fromstring("<img alt='foo'>"), Brand(name="foo")),
+        (fromstring("<p><img alt='foo'></p>"), Brand(name="foo")),
+        (fromstring("<p><p><img alt='foo'></p></p>"), Brand(name="foo")),
+        (Selector(text="<p>foo</p>"), Brand(name="foo")),
+        (SelectorList([Selector(text="<p>foo</p>")]), Brand(name="foo")),
     ],
 )
 def test_brand(input_value, expected_value):
@@ -149,7 +159,7 @@ def brand(self):
 
 def test_brand_page():
     class MyProductPage(ProductPage):
-        @field
+        @field(out=[brand_processor])
         def brand(self):
             return self.css("body")
 
@@ -158,7 +168,7 @@ def brand(self):
         body="<html><body><img alt='foo'></body></html>".encode(),
     )
     page = MyProductPage(response=response)
-    assert page.brand == "foo"
+    assert page.brand == Brand(name="foo")
 
 
 @pytest.mark.parametrize(
@@ -321,3 +331,76 @@ def aggregateRating(self):
     assert page.aggregateRating == AggregateRating(
         ratingValue=3.8, bestRating=10, reviewCount=5
     )
+
+
+@pytest.mark.parametrize(
+    "input_value,expected_value",
+    [
+        (None, None),
+        ([], []),
+        ("https://www.url.com/img.jpg", [Image(url="https://www.url.com/img.jpg")]),
+        (
+            [
+                Image("https://www.url.com/img1.jpg"),
+                Image("https://www.url.com/img2.jpg"),
+            ],
+            [
+                Image("https://www.url.com/img1.jpg"),
+                Image("https://www.url.com/img2.jpg"),
+            ],
+        ),
+        (
+            ["https://www.url.com/img1.jpg", "https://www.url.com/img2.jpg"],
+            [
+                Image("https://www.url.com/img1.jpg"),
+                Image("https://www.url.com/img2.jpg"),
+            ],
+        ),
+        (
+            [
+                {"url": "https://www.url.com/img1.jpg"},
+                {"url": "https://www.url.com/img2.jpg"},
+            ],
+            [
+                Image("https://www.url.com/img1.jpg"),
+                Image("https://www.url.com/img2.jpg"),
+            ],
+        ),
+    ],
+)
+def test_images(input_value, expected_value):
+    class ImagesPage(BasePage):
+        @field(out=[images_processor])
+        def images(self):
+            return input_value
+
+    page = ImagesPage(base_url)  # type: ignore[arg-type]
+    assert page.images == expected_value
+
+
+@pytest.mark.parametrize(
+    "input_value,expected_value",
+    [
+        ("$10", "10.00"),
+        ("100  ", "100.00"),
+        ("100rub", "100.00"),
+        (100, "100.00"),
+        (None, None),
+        ([], []),
+        ({}, {}),
+        ("", None),
+        ("buy 10 ab ab", "10.00"),
+        ("1,000.17", "1000.17"),
+        ("1,000", "1000.00"),
+        (22.9, "22.90"),
+        (22.0, "22.00"),
+    ],
+)
+def test_prices(input_value, expected_value):
+    class PricePage(BasePage):
+        @field(out=[price_processor])
+        def price(self):
+            return input_value
+
+    page = PricePage(base_url)  # type: ignore[arg-type]
+    assert page.price == expected_value
diff --git a/tox.ini b/tox.ini
@@ -73,7 +73,7 @@ commands = mypy zyte_common_items tests
 [testenv:twinecheck]
 basepython = python3
 deps =
-    twine==4.0.2
+    twine==5.1.1
     build==0.10.0
 commands =
     python -m build --sdist

diff --git a/zyte_common_items/pages/product.py b/zyte_common_items/pages/product.py
@@ -19,6 +19,7 @@
     description_html_processor,
     description_processor,
     gtin_processor,
+    images_processor,
     price_processor,
     rating_processor,
     simple_price_processor,
@@ -46,6 +47,7 @@ class Processors(BasePage.Processors):
         gtin = [gtin_processor]
         price = [price_processor]
         regularPrice = [simple_price_processor]
+        images = [images_processor]
 
 
 class ProductPage(
@@ -62,6 +64,7 @@ class Processors(Page.Processors):
         gtin = [gtin_processor]
         price = [price_processor]
         regularPrice = [simple_price_processor]
+        images = [images_processor]
 
 
 @attrs.define

diff --git a/zyte_common_items/processors.py b/zyte_common_items/processors.py
@@ -1,5 +1,6 @@
-from collections.abc import Iterable
+from collections.abc import Iterable, Mapping
 from functools import wraps
+from numbers import Real
 from typing import Any, Callable, List, Optional, Union
 
 from clear_html import clean_node, cleaned_node_to_html, cleaned_node_to_text
@@ -21,8 +22,10 @@
 from .components import (
     AggregateRating,
     BaseMetadata,
+    Brand,
     Breadcrumb,
     Gtin,
+    Image,
     ProbabilityRequest,
     Request,
 )
@@ -104,50 +107,80 @@ def _from_zp_breadcrumb(value: zp_Breadcrumb) -> Breadcrumb:
     return results
 
 
-@only_handle_nodes
-def brand_processor(value: Union[Selector, HtmlElement], page: Any) -> Any:
+def brand_processor(value: Any, page: Any) -> Union[Brand, None]:
     """Convert the data into a brand name if possible.
 
-    Supported inputs are :class:`~parsel.selector.Selector`,
-    :class:`~parsel.selector.SelectorList` and :class:`~lxml.html.HtmlElement`.
-    Other inputs are returned as is.
+    If inputs are either :class:`~parsel.selector.Selector`,
+    :class:`~parsel.selector.SelectorList` or :class:`~lxml.html.HtmlElement`, attempts
+    to extract brand data from it.
+
+    If value is a string, use it to create brand object instance
+
+    Other inputs are returned unchanged
     """
-    return extract_brand_name(value, search_depth=2)
+    value = _handle_selectorlist(value)
 
+    if isinstance(value, str):
+        return Brand(name=value) if value else None
 
-@only_handle_nodes
-def price_processor(value: Union[Selector, HtmlElement], page: Any) -> Any:
+    if isinstance(value, (Selector, SelectorList, HtmlElement)):
+        if brand_name := extract_brand_name(value, search_depth=2):
+            return Brand(name=brand_name)
+        else:
+            return None
+
+    return value
+
+
+def price_processor(value: Any, page: Any) -> Any:
     """Convert the data into a price string if possible.
 
     Uses the price-parser_ library.
 
     Supported inputs are :class:`~parsel.selector.Selector`,
-    :class:`~parsel.selector.SelectorList` and :class:`~lxml.html.HtmlElement`.
+    :class:`~parsel.selector.SelectorList`, :class:`~lxml.html.HtmlElement`, string
+    instances and numeric values.
+
     Other inputs are returned as is.
 
     Puts the parsed Price object into ``page._parsed_price``.
 
     .. _price-parser: https://github.com/scrapinghub/price-parser
     """
-    price = extract_price(value)
-    page._parsed_price = price
-    return _format_price(price)
+    value = _handle_selectorlist(value)
+
+    if isinstance(value, Real):
+        return f"{value:.2f}"
+    elif isinstance(value, (Selector, HtmlElement, str)):
+        price = extract_price(value)
+        page._parsed_price = price
+        return _format_price(price)
+    else:
+        return value
 
 
-@only_handle_nodes
-def simple_price_processor(value: Union[Selector, HtmlElement], page: Any) -> Any:
+def simple_price_processor(value: Any, page: Any) -> Any:
     """Convert the data into a price string if possible.
 
     Uses the price-parser_ library.
 
     Supported inputs are :class:`~parsel.selector.Selector`,
-    :class:`~parsel.selector.SelectorList` and :class:`~lxml.html.HtmlElement`.
+    :class:`~parsel.selector.SelectorList`, :class:`~lxml.html.HtmlElement`, string
+    instances and numeric values.
+
     Other inputs are returned as is.
 
     .. _price-parser: https://github.com/scrapinghub/price-parser
     """
-    price = extract_price(value)
-    return _format_price(price)
+    value = _handle_selectorlist(value)
+
+    if isinstance(value, Real):
+        return f"{value:.2f}"
+    elif isinstance(value, (Selector, HtmlElement, str)):
+        price = extract_price(value)
+        return _format_price(price)
+    else:
+        return value
 
 
 @only_handle_nodes
@@ -330,6 +363,45 @@ def aggregateRating(self):
     return value
 
 
+def images_processor(value: Any, page: Any) -> List[Image]:
+    """Convert the data into a list of :class:`~zyte_common_items.Image`
+    objects if possible.
+
+    If the input is a string, it's used as a url for returning image object.
+
+    If input is either an iterable of strings or mappings with "url" key, they are
+    used to populate image objects.
+
+    Other inputs are returned unchanged.
+    """
+
+    value = _handle_selectorlist(value)
+
+    # TODO: add generic-purpose extract_images utility to zyte-parsers
+    #
+    # if isinstance(value, (Selector, HtmlElement)):
+    #    images = extract_images(value)
+    #    return [Image(url=url) for url in images]
+
+    if isinstance(value, str):
+        return [Image(url=value)]
+
+    if isinstance(value, Iterable):
+        results: List[Any] = []
+        for item in value:
+            if isinstance(item, Image):
+                results.append(item)
+            elif isinstance(item, Mapping):
+                if url := item.get("url"):
+                    results.append(Image(url=url))
+            elif isinstance(item, str):
+                results.append(Image(url=item))
+
+        return results
+
+    return value
+
+
 def probability_request_list_processor(
     request_list: List[Request],
 ) -> List[ProbabilityRequest]: