Skip to content

Commit dd75140

Browse files
authored
Merge pull request #92 from ogajduse/feat/address-gh-comments-0.2.0
2 parents 715c6d7 + 2c786ca commit dd75140

15 files changed

+827
-41
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
*.pyc
33
build/
44
*.egg-info/
5+
custom_components/hacs

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ repos:
2424
- id: mypy
2525
additional_dependencies:
2626
[
27-
homeassistant-stubs,
27+
homeassistant-stubs==2023.8.1,
2828
voluptuous-stubs,
2929
types-python-dateutil,
3030
types-PyYAML,

custom_components/feedparser/sensor.py

+57-27
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import email.utils
55
import logging
6+
import re
67
from datetime import datetime, timedelta, timezone
78
from typing import TYPE_CHECKING
89

@@ -173,11 +174,19 @@ def _generate_sensor_entry(
173174
if key in ["published", "updated", "created", "expired"]:
174175
parsed_date: datetime = self._parse_date(value)
175176
sensor_entry[key] = parsed_date.strftime(self._date_format)
177+
elif key == "image":
178+
sensor_entry["image"] = value.get("href")
176179
else:
177180
sensor_entry[key] = value
178181

179-
self._process_image(feed_entry, sensor_entry)
180-
182+
if "image" in self._inclusions and "image" not in sensor_entry:
183+
sensor_entry["image"] = self._process_image(feed_entry)
184+
if (
185+
"link" in self._inclusions
186+
and "link" not in sensor_entry
187+
and (processed_link := self._process_link(feed_entry))
188+
):
189+
sensor_entry["link"] = processed_link
181190
_LOGGER.debug("Feed %s: Generated sensor entry: %s", self.name, sensor_entry)
182191
return sensor_entry
183192

@@ -194,42 +203,63 @@ def _parse_date(self: FeedParserSensor, date: str) -> datetime:
194203
self.name,
195204
date,
196205
)
206+
# best effort to parse the date using dateutil
207+
parsed_time = parser.parse(date)
208+
209+
if not parsed_time.tzinfo:
210+
# best effort to parse the date using dateutil
197211
parsed_time = parser.parse(date)
198-
if not parsed_time.tzname():
199-
# replace tzinfo with UTC offset if tzinfo does not contain a TZ name
200-
parsed_time = parsed_time.replace(
201-
tzinfo=timezone(parsed_time.utcoffset()), # type: ignore[arg-type]
212+
if not parsed_time.tzinfo:
213+
msg = (
214+
f"Feed {self.name}: Unable to parse date {date}, "
215+
"caused by an incorrect date format"
202216
)
217+
raise ValueError(msg)
218+
if not parsed_time.tzname():
219+
# replace tzinfo with UTC offset if tzinfo does not contain a TZ name
220+
parsed_time = parsed_time.replace(
221+
tzinfo=timezone(parsed_time.utcoffset()), # type: ignore[arg-type]
222+
)
223+
203224
if self._local_time:
204225
parsed_time = dt.as_local(parsed_time)
205226
_LOGGER.debug("Feed %s: Parsed date: %s", self.name, parsed_time)
206227
return parsed_time
207228

208-
def _process_image(
209-
self: FeedParserSensor,
210-
feed_entry: FeedParserDict,
211-
sensor_entry: dict[str, str],
212-
) -> None:
213-
if "image" in self._inclusions and "image" not in sensor_entry.keys():
214-
if "enclosures" in feed_entry:
215-
images = [
216-
enc
217-
for enc in feed_entry["enclosures"]
218-
if enc.type.startswith("image/")
219-
]
220-
else:
221-
images = []
229+
def _process_image(self: FeedParserSensor, feed_entry: FeedParserDict) -> str:
230+
if "enclosures" in feed_entry and feed_entry["enclosures"]:
231+
images = [
232+
enc for enc in feed_entry["enclosures"] if enc.type.startswith("image/")
233+
]
222234
if images:
223-
sensor_entry["image"] = images[0]["href"] # pick the first image found
224-
else:
225-
_LOGGER.debug(
226-
"Feed %s: Image is in inclusions, but no image was found for %s",
235+
# pick the first image found
236+
return images[0]["href"]
237+
elif "summary" in feed_entry:
238+
images = re.findall(
239+
r"<img.+?src=\"(.+?)\".+?>",
240+
feed_entry["summary"],
241+
)
242+
if images:
243+
# pick the first image found
244+
return images[0]
245+
_LOGGER.debug(
246+
"Feed %s: Image is in inclusions, but no image was found for %s",
247+
self.name,
248+
feed_entry,
249+
)
250+
return DEFAULT_THUMBNAIL # use default image if no image found
251+
252+
def _process_link(self: FeedParserSensor, feed_entry: FeedParserDict) -> str:
253+
"""Return link from feed entry."""
254+
if "links" in feed_entry:
255+
if len(feed_entry["links"]) > 1:
256+
_LOGGER.warning(
257+
"Feed %s: More than one link found for %s. Using the first link.",
227258
self.name,
228259
feed_entry,
229260
)
230-
sensor_entry[
231-
"image"
232-
] = DEFAULT_THUMBNAIL # use default image if no image found
261+
return feed_entry["links"][0]["href"]
262+
return ""
233263

234264
@property
235265
def feed_entries(self: FeedParserSensor) -> list[dict[str, str]]:

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ dependencies = ["python-dateutil", "feedparser==6.0.10", "homeassistant"]
2828
[project.optional-dependencies]
2929
dev = [
3030
"black",
31-
"homeassistant-stubs",
31+
"homeassistant-stubs==2023.8.1",
3232
"pytest==7.4.0",
3333
"mypy",
3434
"ruff",

tests/constants.py

+37
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,43 @@
5151
"show_topn": 1,
5252
},
5353
},
54+
{
55+
"has_images": True,
56+
"all_entries_have_images": False,
57+
"has_unique_links": False,
58+
"sensor_config": {
59+
"name": "buienradar_nl",
60+
"feed_url": "https://data.buienradar.nl/1.0/feed/xml/rssbuienradar",
61+
"date_format": "%Y-%m-%d %H:%M:%S.%f",
62+
},
63+
},
64+
{
65+
"has_images": False,
66+
"has_unique_links": False,
67+
"sensor_config": {
68+
"name": "skolmaten_se_ede_skola",
69+
"feed_url": "https://skolmaten.se/ede-skola/rss/weeks/?limit=2",
70+
"inclusions": ["title", "link", "published", "summary"],
71+
},
72+
},
73+
{
74+
"has_images": False,
75+
"sensor_config": {
76+
"name": "api_met_no_metalerts",
77+
"feed_url": "https://api.met.no/weatherapi/metalerts/1.1/",
78+
"inclusions": ["title", "link", "published", "summary"],
79+
},
80+
},
81+
{
82+
"has_images": True,
83+
"has_unique_images": False,
84+
"has_unique_titles": False,
85+
"sensor_config": {
86+
"name": "anp_nieuws",
87+
"feed_url": "https://www.omnycontent.com/d/playlist/56ccbbb7-0ff7-4482-9d99-a88800f49f6c/a49c87f6-d567-4189-8692-a8e2009eaf86/9fea2041-fccd-4fcf-8cec-a8e2009eeca2/podcast.rss",
88+
"inclusions": ["title", "link", "published", "summary"],
89+
},
90+
},
5491
]
5592

5693
DEFAULT_EXCLUSIONS: list[str] = []

tests/data/anp_nieuws.json

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"has_images": true,
3+
"has_unique_images": false,
4+
"has_unique_titles": false,
5+
"sensor_config": {
6+
"name": "anp_nieuws",
7+
"feed_url": "https://www.omnycontent.com/d/playlist/56ccbbb7-0ff7-4482-9d99-a88800f49f6c/a49c87f6-d567-4189-8692-a8e2009eaf86/9fea2041-fccd-4fcf-8cec-a8e2009eeca2/podcast.rss",
8+
"inclusions": [
9+
"title",
10+
"link",
11+
"published",
12+
"summary"
13+
]
14+
},
15+
"download_date": "2023-08-18T09:22:14.164244+00:00"
16+
}

0 commit comments

Comments
 (0)