-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy patharticle.py
More file actions
117 lines (86 loc) · 3.12 KB
/
article.py
File metadata and controls
117 lines (86 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from typing import List, Optional
import attrs
from web_poet import Returns
from zyte_common_items.components import Audio, Author, Breadcrumb, Image, Video
from zyte_common_items.fields import auto_field
from zyte_common_items.items import Article, ArticleMetadata
from zyte_common_items.processors import breadcrumbs_processor, string_processor
from .base import BasePage, Page
from .mixins import HasMetadata
class _ArticleProcessors(BasePage.Processors):
breadcrumbs = [breadcrumbs_processor]
headline = [string_processor]
datePublished = [string_processor]
datePublishedRaw = [string_processor]
dateModified = [string_processor]
dateModifiedRaw = [string_processor]
inLanguage = [string_processor]
description = [string_processor]
articleBody = [string_processor]
articleBodyHtml = [string_processor]
canonicalUrl = [string_processor]
class BaseArticlePage(BasePage, Returns[Article], HasMetadata[ArticleMetadata]):
""":class:`BasePage` subclass for :class:`Article`."""
class Processors(_ArticleProcessors):
pass
class ArticlePage(Page, Returns[Article], HasMetadata[ArticleMetadata]):
""":class:`Page` subclass for :class:`Article`."""
class Processors(_ArticleProcessors):
pass
@attrs.define
class AutoArticlePage(BaseArticlePage):
article: Article
@auto_field
def headline(self) -> Optional[str]:
return self.article.headline
@auto_field
def datePublished(self) -> Optional[str]:
return self.article.datePublished
@auto_field
def datePublishedRaw(self) -> Optional[str]:
return self.article.datePublishedRaw
@auto_field
def dateModified(self) -> Optional[str]:
return self.article.dateModified
@auto_field
def dateModifiedRaw(self) -> Optional[str]:
return self.article.dateModifiedRaw
@auto_field
def authors(self) -> Optional[List[Author]]:
return self.article.authors
@auto_field
def breadcrumbs(self) -> Optional[List[Breadcrumb]]:
return self.article.breadcrumbs
@auto_field
def inLanguage(self) -> Optional[str]:
return self.article.inLanguage
@auto_field
def mainImage(self) -> Optional[Image]:
return self.article.mainImage
@auto_field
def images(self) -> Optional[List[Image]]:
return self.article.images
@auto_field
def description(self) -> Optional[str]:
return self.article.description
@auto_field
def articleBody(self) -> Optional[str]:
return self.article.articleBody
@auto_field
def articleBodyHtml(self) -> Optional[str]:
return self.article.articleBodyHtml
@auto_field
def videos(self) -> Optional[List[Video]]:
return self.article.videos
@auto_field
def audios(self) -> Optional[List[Audio]]:
return self.article.audios
@auto_field
def canonicalUrl(self) -> Optional[str]:
return self.article.canonicalUrl
@auto_field
def url(self) -> Optional[str]:
return self.article.url
@auto_field
def metadata(self) -> Optional[ArticleMetadata]:
return self.article.metadata