diff --git a/tests/test_processors.py b/tests/test_processors.py index e1a9a315..4ba280f9 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -313,6 +313,30 @@ def gtin(self): }, AggregateRating(ratingValue=3.8, bestRating=10.0, reviewCount=3), ), + ( + { + "ratingValue": "2.12", + "bestRating": "5.0", + "reviewCount": "12", + }, + AggregateRating(ratingValue=2.12, bestRating=5.0, reviewCount=12), + ), + ( + { + "ratingValue": "2,12", + "bestRating": "5,0", + "reviewCount": "12", + }, + AggregateRating(ratingValue=2.12, bestRating=5.0, reviewCount=12), + ), + ( + { + "ratingValue": "2.12", + "bestRating": "5.0", + "reviewCount": "12,123", + }, + AggregateRating(ratingValue=2.12, bestRating=5.0, reviewCount=12123), + ), ], ) def test_rating(input_value, expected_value): diff --git a/zyte_common_items/processors.py b/zyte_common_items/processors.py index ca36d389..824c3116 100644 --- a/zyte_common_items/processors.py +++ b/zyte_common_items/processors.py @@ -1,6 +1,6 @@ from collections.abc import Iterable, Mapping from functools import wraps -from numbers import Real +from numbers import Integral, Real from typing import Any, Callable, List, Optional, Union from clear_html import clean_node, cleaned_node_to_html, cleaned_node_to_text @@ -31,6 +31,26 @@ ) +def _to_int(value: Any) -> Any: + if isinstance(value, Integral): + return int(value) + elif isinstance(value, str): + if "," in value: + value = value.replace(",", "") + return int(value) + return value + + +def _to_float(value: Any) -> Any: + if isinstance(value, Real): + return float(value) + elif isinstance(value, str): + if "," in value: + value = value.replace(",", ".") + return float(value) + return value + + def _get_base_url(page: Any) -> Optional[str]: if isinstance(page, ResponseShortcutsMixin): return page.base_url @@ -292,7 +312,7 @@ def rating_processor(value: Any, page: Any) -> Any: The input can also be a dictionary with one or more of the :class:`~zyte_common_items.AggregateRating` fields as keys. The values for those keys can be either final values, to be assigned to the corresponding - fields, or selector-like objects. + fields, strings to be parsed, or selector-like objects. If a returning dictionary is missing the ``bestRating`` field and ``ratingValue`` is a selector-like object, ``bestRating`` may be extracted. @@ -343,7 +363,7 @@ def aggregateRating(self): if isinstance(review_count, (Selector, HtmlElement)): result.reviewCount = extract_review_count(review_count) elif review_count is not None: - result.reviewCount = int(review_count) + result.reviewCount = _to_int(review_count) rating_value = _handle_selectorlist(value.get("ratingValue")) if isinstance(rating_value, (Selector, HtmlElement)): @@ -351,10 +371,10 @@ def aggregateRating(self): result.ratingValue = zp_rating.ratingValue result.bestRating = zp_rating.bestRating elif rating_value is not None: - result.ratingValue = float(rating_value) + result.ratingValue = _to_float(rating_value) if (best_rating := value.get("bestRating")) is not None: - result.bestRating = float(best_rating) + result.bestRating = _to_float(best_rating) if result.reviewCount or result.bestRating or result.ratingValue: return result