-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathbase.py
More file actions
75 lines (58 loc) · 2.31 KB
/
base.py
File metadata and controls
75 lines (58 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import attrs
from web_poet import ItemPage, RequestUrl, WebPage, field
from web_poet.pages import ItemT
from .._dateutils import utcnow_formatted
from ..processors import metadata_processor, string_processor
from .mixins import HasMetadata, MetadataT
class _BasePage(ItemPage[ItemT], HasMetadata[MetadataT]):
class Processors:
metadata = [metadata_processor]
@field
def metadata(self) -> MetadataT:
if self.metadata_cls is None:
raise ValueError(f"{type(self)} doesn'have a metadata class configured.")
value = self.metadata_cls()
attributes = dir(value)
if "dateDownloaded" in attributes:
value.dateDownloaded = utcnow_formatted() # type: ignore
if "probability" in attributes:
value.probability = 1.0 # type: ignore
return value
def no_item_found(self) -> ItemT:
"""Return an item with the current url and probability=0,
indicating that the passed URL doesn't contain the expected item.
Use it in your .validate_input implementation.
"""
if self.metadata_cls is None:
raise ValueError(f"{type(self)} doesn'have a metadata class configured.")
metadata = self.metadata_cls()
metadata_attributes = dir(metadata)
if "dateDownloaded" in metadata_attributes:
metadata.dateDownloaded = utcnow_formatted() # type: ignore
if "probability" in metadata_attributes:
metadata.probability = 0.0 # type: ignore
return self.item_cls( # type: ignore
url=self.url, # type: ignore[attr-defined]
metadata=metadata,
)
class _BaseProcessors(_BasePage.Processors):
url = [string_processor]
@attrs.define
class BasePage(_BasePage):
"""Base class for page object classes that has
:class:`~web_poet.page_inputs.http.RequestUrl` as a dependency."""
class Processors(_BaseProcessors):
pass
request_url: RequestUrl
@field
def url(self) -> str:
return str(self.request_url)
@attrs.define
class Page(_BasePage, WebPage):
"""Base class for page object classes that has
:class:`~web_poet.page_inputs.http.HttpResponse` as a dependency."""
class Processors(_BaseProcessors):
pass
@field
def url(self) -> str:
return str(self.response.url)