Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions tests/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
brand_processor,
breadcrumbs_processor,
gtin_processor,
list_processor,
rating_processor,
string_processor,
)

base_url = "http://www.example.com/blog/"
Expand Down Expand Up @@ -321,3 +323,43 @@ def aggregateRating(self):
assert page.aggregateRating == AggregateRating(
ratingValue=3.8, bestRating=10, reviewCount=5
)


@pytest.mark.parametrize(
"input_value,expected_value",
[
(None, None),
("", ""),
("Value ", "Value"),
(" Value", "Value"),
(" Value ", "Value"),
],
)
def test_string_processor(input_value, expected_value):
class RatingPage(BasePage):
@field(out=[string_processor])
def name(self):
return input_value

page = RatingPage(base_url) # type: ignore[arg-type]
assert page.name == expected_value


@pytest.mark.parametrize(
"input_value,expected_value",
[
(None, None),
([], []),
(["a", "b"], ["a", "b"]),
([" a", "b "], ["a", "b"]),
([" a ", " b "], ["a", "b"]),
],
)
def test_list_processor(input_value, expected_value):
class RatingPage(BasePage):
@field(out=[list_processor(string_processor)])
def name(self):
return input_value

page = RatingPage(base_url) # type: ignore[arg-type]
assert page.name == expected_value
24 changes: 23 additions & 1 deletion zyte_common_items/pages/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from zyte_common_items.components import Audio, Author, Breadcrumb, Image, Video
from zyte_common_items.fields import auto_field
from zyte_common_items.items import Article, ArticleMetadata
from zyte_common_items.processors import breadcrumbs_processor
from zyte_common_items.processors import breadcrumbs_processor, string_processor

from .base import BasePage, Page
from .mixins import HasMetadata
Expand All @@ -17,13 +17,35 @@ class BaseArticlePage(BasePage, Returns[Article], HasMetadata[ArticleMetadata]):

class Processors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
headline = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
inLanguage = [string_processor]
description = [string_processor]
articleBody = [string_processor]
articleBodyHtml = [string_processor]
canonicalUrl = [string_processor]
url = [string_processor]


class ArticlePage(Page, Returns[Article], HasMetadata[ArticleMetadata]):
""":class:`Page` subclass for :class:`Article`."""

class Processors(Page.Processors):
breadcrumbs = [breadcrumbs_processor]
headline = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
inLanguage = [string_processor]
description = [string_processor]
articleBody = [string_processor]
articleBodyHtml = [string_processor]
canonicalUrl = [string_processor]
url = [string_processor]


@attrs.define
Expand Down
4 changes: 3 additions & 1 deletion zyte_common_items/pages/article_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from zyte_common_items.components import Breadcrumb
from zyte_common_items.fields import auto_field
from zyte_common_items.items import ArticleFromList, ArticleList, ArticleListMetadata
from zyte_common_items.processors import breadcrumbs_processor
from zyte_common_items.processors import breadcrumbs_processor, string_processor

from .base import BasePage, Page
from .mixins import HasMetadata
Expand All @@ -26,6 +26,8 @@ class ArticleListPage(Page, Returns[ArticleList], HasMetadata[ArticleListMetadat

class Processors(Page.Processors):
breadcrumbs = [breadcrumbs_processor]
canonicalUrl = [string_processor]
url = [string_processor]


@attrs.define
Expand Down
11 changes: 11 additions & 0 deletions zyte_common_items/pages/article_navigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from zyte_common_items.components import ProbabilityRequest, Request
from zyte_common_items.fields import auto_field
from zyte_common_items.items import ArticleNavigation, ArticleNavigationMetadata
from zyte_common_items.processors import string_processor

from .base import BasePage, Page
from .mixins import HasMetadata
Expand All @@ -16,12 +17,22 @@ class BaseArticleNavigationPage(
):
""":class:`BasePage` subclass for :class:`ArticleNavigation`."""

class Processors(BasePage.Processors):
categoryName = [string_processor]
pageNumber = [string_processor]
url = [string_processor]


class ArticleNavigationPage(
Page, Returns[ArticleNavigation], HasMetadata[ArticleNavigationMetadata]
):
""":class:`Page` subclass for :class:`ArticleNavigation`."""

class Processors(Page.Processors):
categoryName = [string_processor]
pageNumber = [string_processor]
url = [string_processor]


@attrs.define
class AutoArticleNavigationPage(BaseArticleNavigationPage):
Expand Down
6 changes: 3 additions & 3 deletions zyte_common_items/pages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from web_poet.pages import ItemT

from .._dateutils import utcnow_formatted
from ..processors import metadata_processor
from ..processors import metadata_processor, string_processor
from .mixins import HasMetadata, MetadataT


Expand Down Expand Up @@ -49,7 +49,7 @@ class BasePage(_BasePage):
:class:`~web_poet.page_inputs.http.RequestUrl` as a dependency."""

class Processors(_BasePage.Processors):
pass
url = [string_processor]

request_url: RequestUrl

Expand All @@ -64,7 +64,7 @@ class Page(_BasePage, WebPage):
:class:`~web_poet.page_inputs.http.HttpResponse` as a dependency."""

class Processors(_BasePage.Processors):
pass
url = [string_processor]

@field
def url(self) -> str:
Expand Down
29 changes: 28 additions & 1 deletion zyte_common_items/pages/business_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@
)
from zyte_common_items.fields import auto_field
from zyte_common_items.items import BusinessPlace, BusinessPlaceMetadata
from zyte_common_items.processors import description_processor, rating_processor
from zyte_common_items.processors import (
description_processor,
list_processor,
rating_processor,
string_processor,
)

from .base import BasePage, Page
from .mixins import HasMetadata
Expand All @@ -30,6 +35,17 @@ class BaseBusinessPlacePage(
class Processors(BasePage.Processors):
aggregateRating = [rating_processor]
description = [description_processor]
categories = [list_processor(string_processor)]
features = [list_processor(string_processor)]
map = [string_processor]
name = [string_processor]
placeId = [string_processor]
priceRange = [string_processor]
tags = [list_processor(string_processor)]
telephone = [string_processor]
timezone = [string_processor]
url = [string_processor]
website = [string_processor]


class BusinessPlacePage(
Expand All @@ -40,6 +56,17 @@ class BusinessPlacePage(
class Processors(Page.Processors):
aggregateRating = [rating_processor]
description = [description_processor]
categories = [list_processor(string_processor)]
features = [list_processor(string_processor)]
map = [string_processor]
name = [string_processor]
placeId = [string_processor]
priceRange = [string_processor]
tags = [list_processor(string_processor)]
telephone = [string_processor]
timezone = [string_processor]
url = [string_processor]
website = [string_processor]


@attrs.define
Expand Down
32 changes: 32 additions & 0 deletions zyte_common_items/pages/job_posting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from zyte_common_items.processors import (
description_html_processor,
description_processor,
list_processor,
string_processor,
)

from .base import BasePage, Page
Expand All @@ -23,6 +25,21 @@ class BaseJobPostingPage(
class Processors(BasePage.Processors):
description = [description_processor]
descriptionHtml = [description_html_processor]
url = [string_processor]
jobPostingId = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
validThrough = [string_processor]
validThroughRaw = [string_processor]
jobTitle = [string_processor]
headline = [string_processor]
employmentType = [string_processor]
requirements = [list_processor(string_processor)]
jobStartDate = [string_processor]
jobStartDateRaw = [string_processor]
remoteStatus = [string_processor]


class JobPostingPage(
Expand All @@ -33,6 +50,21 @@ class JobPostingPage(
class Processors(Page.Processors):
description = [description_processor]
descriptionHtml = [description_html_processor]
url = [string_processor]
jobPostingId = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
validThrough = [string_processor]
validThroughRaw = [string_processor]
jobTitle = [string_processor]
headline = [string_processor]
employmentType = [string_processor]
requirements = [list_processor(string_processor)]
jobStartDate = [string_processor]
jobStartDateRaw = [string_processor]
remoteStatus = [string_processor]


@attrs.define
Expand Down
28 changes: 28 additions & 0 deletions zyte_common_items/pages/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
description_html_processor,
description_processor,
gtin_processor,
list_processor,
price_processor,
rating_processor,
simple_price_processor,
string_processor,
)

from .base import BasePage, Page
Expand All @@ -46,6 +48,19 @@ class Processors(BasePage.Processors):
gtin = [gtin_processor]
price = [price_processor]
regularPrice = [simple_price_processor]
availability = [string_processor]
canonicalUrl = [string_processor]
color = [string_processor]
currency = [string_processor]
currencyRaw = [string_processor]
features = [list_processor(string_processor)]
mpn = [string_processor]
name = [string_processor]
productId = [string_processor]
size = [string_processor]
sku = [string_processor]
style = [string_processor]
url = [string_processor]


class ProductPage(
Expand All @@ -62,6 +77,19 @@ class Processors(Page.Processors):
gtin = [gtin_processor]
price = [price_processor]
regularPrice = [simple_price_processor]
availability = [string_processor]
canonicalUrl = [string_processor]
color = [string_processor]
currency = [string_processor]
currencyRaw = [string_processor]
features = [list_processor(string_processor)]
mpn = [string_processor]
name = [string_processor]
productId = [string_processor]
size = [string_processor]
sku = [string_processor]
style = [string_processor]
url = [string_processor]


@attrs.define
Expand Down
8 changes: 7 additions & 1 deletion zyte_common_items/pages/product_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from zyte_common_items.components import Breadcrumb, Link
from zyte_common_items.fields import auto_field
from zyte_common_items.items import ProductFromList, ProductList, ProductListMetadata
from zyte_common_items.processors import breadcrumbs_processor
from zyte_common_items.processors import breadcrumbs_processor, string_processor

from .base import BasePage, Page
from .mixins import HasMetadata
Expand All @@ -19,13 +19,19 @@ class BaseProductListPage(

class Processors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
canonicalUrl = [string_processor]
categoryName = [string_processor]
url = [string_processor]


class ProductListPage(Page, Returns[ProductList], HasMetadata[ProductListMetadata]):
""":class:`Page` subclass for :class:`ProductList`."""

class Processors(Page.Processors):
breadcrumbs = [breadcrumbs_processor]
canonicalUrl = [string_processor]
categoryName = [string_processor]
url = [string_processor]


@attrs.define
Expand Down
13 changes: 12 additions & 1 deletion zyte_common_items/pages/product_navigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from zyte_common_items.components import ProbabilityRequest, Request
from zyte_common_items.fields import auto_field
from zyte_common_items.items import ProductNavigation, ProductNavigationMetadata
from zyte_common_items.processors import probability_request_list_processor
from zyte_common_items.processors import (
probability_request_list_processor,
string_processor,
)

from .base import BasePage, Page
from .mixins import HasMetadata
Expand All @@ -20,13 +23,21 @@ class BaseProductNavigationPage(
class Processors(BasePage.Processors):
subCategories = [probability_request_list_processor]
items = [probability_request_list_processor]
categoryName = [string_processor]
url = [string_processor]


class ProductNavigationPage(
Page, Returns[ProductNavigation], HasMetadata[ProductNavigationMetadata]
):
""":class:`Page` subclass for :class:`ProductNavigation`."""

class Processors(BasePage.Processors):
subCategories = [probability_request_list_processor]
items = [probability_request_list_processor]
categoryName = [string_processor]
url = [string_processor]


@attrs.define
class AutoProductNavigationPage(BaseProductNavigationPage):
Expand Down
Loading