-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy patharticle.py
More file actions
125 lines (96 loc) · 3.61 KB
/
article.py
File metadata and controls
125 lines (96 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from typing import List, Optional
import attrs
from web_poet import Returns
from zyte_common_items.components import Audio, Author, Breadcrumb, Image, Video
from zyte_common_items.fields import auto_field
from zyte_common_items.items import Article, ArticleMetadata
from zyte_common_items.processors import breadcrumbs_processor, string_processor
from .base import BasePage, Page
from .mixins import HasMetadata
class BaseArticlePage(BasePage, Returns[Article], HasMetadata[ArticleMetadata]):
""":class:`BasePage` subclass for :class:`Article`."""
class Processors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
headline = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
inLanguage = [string_processor]
description = [string_processor]
articleBody = [string_processor]
articleBodyHtml = [string_processor]
canonicalUrl = [string_processor]
url = [string_processor]
class ArticlePage(Page, Returns[Article], HasMetadata[ArticleMetadata]):
""":class:`Page` subclass for :class:`Article`."""
class Processors(Page.Processors):
breadcrumbs = [breadcrumbs_processor]
headline = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
inLanguage = [string_processor]
description = [string_processor]
articleBody = [string_processor]
articleBodyHtml = [string_processor]
canonicalUrl = [string_processor]
url = [string_processor]
@attrs.define
class AutoArticlePage(BaseArticlePage):
article: Article
@auto_field
def headline(self) -> Optional[str]:
return self.article.headline
@auto_field
def datePublished(self) -> Optional[str]:
return self.article.datePublished
@auto_field
def datePublishedRaw(self) -> Optional[str]:
return self.article.datePublishedRaw
@auto_field
def dateModified(self) -> Optional[str]:
return self.article.dateModified
@auto_field
def dateModifiedRaw(self) -> Optional[str]:
return self.article.dateModifiedRaw
@auto_field
def authors(self) -> Optional[List[Author]]:
return self.article.authors
@auto_field
def breadcrumbs(self) -> Optional[List[Breadcrumb]]:
return self.article.breadcrumbs
@auto_field
def inLanguage(self) -> Optional[str]:
return self.article.inLanguage
@auto_field
def mainImage(self) -> Optional[Image]:
return self.article.mainImage
@auto_field
def images(self) -> Optional[List[Image]]:
return self.article.images
@auto_field
def description(self) -> Optional[str]:
return self.article.description
@auto_field
def articleBody(self) -> Optional[str]:
return self.article.articleBody
@auto_field
def articleBodyHtml(self) -> Optional[str]:
return self.article.articleBodyHtml
@auto_field
def videos(self) -> Optional[List[Video]]:
return self.article.videos
@auto_field
def audios(self) -> Optional[List[Audio]]:
return self.article.audios
@auto_field
def canonicalUrl(self) -> Optional[str]:
return self.article.canonicalUrl
@auto_field
def url(self) -> Optional[str]:
return self.article.url
@auto_field
def metadata(self) -> Optional[ArticleMetadata]:
return self.article.metadata