Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions tests/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
brand_processor,
breadcrumbs_processor,
gtin_processor,
list_processor,
rating_processor,
string_processor,
)

base_url = "http://www.example.com/blog/"
Expand Down Expand Up @@ -321,3 +323,43 @@ def aggregateRating(self):
assert page.aggregateRating == AggregateRating(
ratingValue=3.8, bestRating=10, reviewCount=5
)


@pytest.mark.parametrize(
"input_value,expected_value",
[
(None, None),
("", ""),
("Value ", "Value"),
(" Value", "Value"),
(" Value ", "Value"),
],
)
def test_string_processor(input_value, expected_value):
class RatingPage(BasePage):
@field(out=[string_processor])
def name(self):
return input_value

page = RatingPage(base_url) # type: ignore[arg-type]
assert page.name == expected_value


@pytest.mark.parametrize(
"input_value,expected_value",
[
(None, None),
([], []),
(["a", "b"], ["a", "b"]),
([" a", "b "], ["a", "b"]),
([" a ", " b "], ["a", "b"]),
],
)
def test_list_processor(input_value, expected_value):
class RatingPage(BasePage):
@field(out=[list_processor(string_processor)])
def name(self):
return input_value

page = RatingPage(base_url) # type: ignore[arg-type]
assert page.name == expected_value
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ commands = mypy zyte_common_items tests
[testenv:twinecheck]
basepython = python3
deps =
twine==4.0.2
twine==5.1.1
build==0.10.0
commands =
python -m build --sdist
Expand Down
24 changes: 19 additions & 5 deletions zyte_common_items/pages/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,38 @@
from zyte_common_items.components import Audio, Author, Breadcrumb, Image, Video
from zyte_common_items.fields import auto_field
from zyte_common_items.items import Article, ArticleMetadata
from zyte_common_items.processors import breadcrumbs_processor
from zyte_common_items.processors import breadcrumbs_processor, string_processor

from .base import BasePage, Page
from .mixins import HasMetadata


class _ArticleProcessors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
headline = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
inLanguage = [string_processor]
description = [string_processor]
articleBody = [string_processor]
articleBodyHtml = [string_processor]
canonicalUrl = [string_processor]


class BaseArticlePage(BasePage, Returns[Article], HasMetadata[ArticleMetadata]):
""":class:`BasePage` subclass for :class:`Article`."""

class Processors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
class Processors(_ArticleProcessors):
pass


class ArticlePage(Page, Returns[Article], HasMetadata[ArticleMetadata]):
""":class:`Page` subclass for :class:`Article`."""

class Processors(Page.Processors):
breadcrumbs = [breadcrumbs_processor]
class Processors(_ArticleProcessors):
pass


@attrs.define
Expand Down
15 changes: 10 additions & 5 deletions zyte_common_items/pages/article_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,31 @@
from zyte_common_items.components import Breadcrumb
from zyte_common_items.fields import auto_field
from zyte_common_items.items import ArticleFromList, ArticleList, ArticleListMetadata
from zyte_common_items.processors import breadcrumbs_processor
from zyte_common_items.processors import breadcrumbs_processor, string_processor

from .base import BasePage, Page
from .mixins import HasMetadata


class _ArticleListProcessors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
canonicalUrl = [string_processor]


class BaseArticleListPage(
BasePage, Returns[ArticleList], HasMetadata[ArticleListMetadata]
):
""":class:`BasePage` subclass for :class:`ArticleList`."""

class Processors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
class Processors(_ArticleListProcessors):
pass


class ArticleListPage(Page, Returns[ArticleList], HasMetadata[ArticleListMetadata]):
""":class:`Page` subclass for :class:`ArticleList`."""

class Processors(Page.Processors):
breadcrumbs = [breadcrumbs_processor]
class Processors(_ArticleListProcessors):
pass


@attrs.define
Expand Down
11 changes: 11 additions & 0 deletions zyte_common_items/pages/article_navigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,33 @@
from zyte_common_items.components import ProbabilityRequest, Request
from zyte_common_items.fields import auto_field
from zyte_common_items.items import ArticleNavigation, ArticleNavigationMetadata
from zyte_common_items.processors import string_processor

from .base import BasePage, Page
from .mixins import HasMetadata


class _ArticleNavigationProcessors(BasePage.Processors):
categoryName = [string_processor]


class BaseArticleNavigationPage(
BasePage, Returns[ArticleNavigation], HasMetadata[ArticleNavigationMetadata]
):
""":class:`BasePage` subclass for :class:`ArticleNavigation`."""

class Processors(_ArticleNavigationProcessors):
pass


class ArticleNavigationPage(
Page, Returns[ArticleNavigation], HasMetadata[ArticleNavigationMetadata]
):
""":class:`Page` subclass for :class:`ArticleNavigation`."""

class Processors(_ArticleNavigationProcessors):
pass


@attrs.define
class AutoArticleNavigationPage(BaseArticleNavigationPage):
Expand Down
10 changes: 7 additions & 3 deletions zyte_common_items/pages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from web_poet.pages import ItemT

from .._dateutils import utcnow_formatted
from ..processors import metadata_processor
from ..processors import metadata_processor, string_processor
from .mixins import HasMetadata, MetadataT


Expand Down Expand Up @@ -43,12 +43,16 @@ def no_item_found(self) -> ItemT:
)


class _BaseProcessors(_BasePage.Processors):
url = [string_processor]


@attrs.define
class BasePage(_BasePage):
"""Base class for page object classes that has
:class:`~web_poet.page_inputs.http.RequestUrl` as a dependency."""

class Processors(_BasePage.Processors):
class Processors(_BaseProcessors):
pass

request_url: RequestUrl
Expand All @@ -63,7 +67,7 @@ class Page(_BasePage, WebPage):
"""Base class for page object classes that has
:class:`~web_poet.page_inputs.http.HttpResponse` as a dependency."""

class Processors(_BasePage.Processors):
class Processors(_BaseProcessors):
pass

@field
Expand Down
32 changes: 25 additions & 7 deletions zyte_common_items/pages/business_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,48 @@
)
from zyte_common_items.fields import auto_field
from zyte_common_items.items import BusinessPlace, BusinessPlaceMetadata
from zyte_common_items.processors import description_processor, rating_processor
from zyte_common_items.processors import (
description_processor,
list_processor,
rating_processor,
string_processor,
)

from .base import BasePage, Page
from .mixins import HasMetadata


class _BusinessPlaceProcessors(BasePage.Processors):
aggregateRating = [rating_processor]
description = [description_processor]
categories = [list_processor(string_processor)]
features = [list_processor(string_processor)]
map = [string_processor]
name = [string_processor]
placeId = [string_processor]
priceRange = [string_processor]
tags = [list_processor(string_processor)]
telephone = [string_processor]
timezone = [string_processor]
website = [string_processor]


class BaseBusinessPlacePage(
BasePage, Returns[BusinessPlace], HasMetadata[BusinessPlaceMetadata]
):
""":class:`BasePage` subclass for :class:`BusinessPlace`."""

class Processors(BasePage.Processors):
aggregateRating = [rating_processor]
description = [description_processor]
class Processors(_BusinessPlaceProcessors):
pass


class BusinessPlacePage(
Page, Returns[BusinessPlace], HasMetadata[BusinessPlaceMetadata]
):
""":class:`Page` subclass for :class:`BusinessPlace`."""

class Processors(Page.Processors):
aggregateRating = [rating_processor]
description = [description_processor]
class Processors(_BusinessPlaceProcessors):
pass


@attrs.define
Expand Down
31 changes: 25 additions & 6 deletions zyte_common_items/pages/job_posting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,49 @@
from zyte_common_items.processors import (
description_html_processor,
description_processor,
list_processor,
string_processor,
)

from .base import BasePage, Page
from .mixins import DescriptionMixin, HasMetadata


class _JobPostingProcessors(BasePage.Processors):
description = [description_processor]
descriptionHtml = [description_html_processor]
jobPostingId = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
validThrough = [string_processor]
validThroughRaw = [string_processor]
jobTitle = [string_processor]
headline = [string_processor]
employmentType = [string_processor]
requirements = [list_processor(string_processor)]
jobStartDate = [string_processor]
jobStartDateRaw = [string_processor]
remoteStatus = [string_processor]


class BaseJobPostingPage(
BasePage, DescriptionMixin, Returns[JobPosting], HasMetadata[JobPostingMetadata]
):
""":class:`BasePage` subclass for :class:`JobPosting`."""

class Processors(BasePage.Processors):
description = [description_processor]
descriptionHtml = [description_html_processor]
class Processors(_JobPostingProcessors):
pass


class JobPostingPage(
Page, DescriptionMixin, Returns[JobPosting], HasMetadata[JobPostingMetadata]
):
""":class:`Page` subclass for :class:`JobPosting`."""

class Processors(Page.Processors):
description = [description_processor]
descriptionHtml = [description_html_processor]
class Processors(_JobPostingProcessors):
pass


@attrs.define
Expand Down
Loading