3
3
4
4
import email .utils
5
5
import logging
6
+ import re
6
7
from datetime import datetime , timedelta , timezone
7
8
from typing import TYPE_CHECKING
8
9
@@ -173,11 +174,19 @@ def _generate_sensor_entry(
173
174
if key in ["published" , "updated" , "created" , "expired" ]:
174
175
parsed_date : datetime = self ._parse_date (value )
175
176
sensor_entry [key ] = parsed_date .strftime (self ._date_format )
177
+ elif key == "image" :
178
+ sensor_entry ["image" ] = value .get ("href" )
176
179
else :
177
180
sensor_entry [key ] = value
178
181
179
- self ._process_image (feed_entry , sensor_entry )
180
-
182
+ if "image" in self ._inclusions and "image" not in sensor_entry :
183
+ sensor_entry ["image" ] = self ._process_image (feed_entry )
184
+ if (
185
+ "link" in self ._inclusions
186
+ and "link" not in sensor_entry
187
+ and (processed_link := self ._process_link (feed_entry ))
188
+ ):
189
+ sensor_entry ["link" ] = processed_link
181
190
_LOGGER .debug ("Feed %s: Generated sensor entry: %s" , self .name , sensor_entry )
182
191
return sensor_entry
183
192
@@ -194,42 +203,63 @@ def _parse_date(self: FeedParserSensor, date: str) -> datetime:
194
203
self .name ,
195
204
date ,
196
205
)
206
+ # best effort to parse the date using dateutil
207
+ parsed_time = parser .parse (date )
208
+
209
+ if not parsed_time .tzinfo :
210
+ # best effort to parse the date using dateutil
197
211
parsed_time = parser .parse (date )
198
- if not parsed_time .tzname () :
199
- # replace tzinfo with UTC offset if tzinfo does not contain a TZ name
200
- parsed_time = parsed_time . replace (
201
- tzinfo = timezone ( parsed_time . utcoffset ()), # type: ignore[arg-type]
212
+ if not parsed_time .tzinfo :
213
+ msg = (
214
+ f"Feed { self . name } : Unable to parse date { date } , "
215
+ "caused by an incorrect date format"
202
216
)
217
+ raise ValueError (msg )
218
+ if not parsed_time .tzname ():
219
+ # replace tzinfo with UTC offset if tzinfo does not contain a TZ name
220
+ parsed_time = parsed_time .replace (
221
+ tzinfo = timezone (parsed_time .utcoffset ()), # type: ignore[arg-type]
222
+ )
223
+
203
224
if self ._local_time :
204
225
parsed_time = dt .as_local (parsed_time )
205
226
_LOGGER .debug ("Feed %s: Parsed date: %s" , self .name , parsed_time )
206
227
return parsed_time
207
228
208
- def _process_image (
209
- self : FeedParserSensor ,
210
- feed_entry : FeedParserDict ,
211
- sensor_entry : dict [str , str ],
212
- ) -> None :
213
- if "image" in self ._inclusions and "image" not in sensor_entry .keys ():
214
- if "enclosures" in feed_entry :
215
- images = [
216
- enc
217
- for enc in feed_entry ["enclosures" ]
218
- if enc .type .startswith ("image/" )
219
- ]
220
- else :
221
- images = []
229
+ def _process_image (self : FeedParserSensor , feed_entry : FeedParserDict ) -> str :
230
+ if "enclosures" in feed_entry and feed_entry ["enclosures" ]:
231
+ images = [
232
+ enc for enc in feed_entry ["enclosures" ] if enc .type .startswith ("image/" )
233
+ ]
222
234
if images :
223
- sensor_entry ["image" ] = images [0 ]["href" ] # pick the first image found
224
- else :
225
- _LOGGER .debug (
226
- "Feed %s: Image is in inclusions, but no image was found for %s" ,
235
+ # pick the first image found
236
+ return images [0 ]["href" ]
237
+ elif "summary" in feed_entry :
238
+ images = re .findall (
239
+ r"<img.+?src=\"(.+?)\".+?>" ,
240
+ feed_entry ["summary" ],
241
+ )
242
+ if images :
243
+ # pick the first image found
244
+ return images [0 ]
245
+ _LOGGER .debug (
246
+ "Feed %s: Image is in inclusions, but no image was found for %s" ,
247
+ self .name ,
248
+ feed_entry ,
249
+ )
250
+ return DEFAULT_THUMBNAIL # use default image if no image found
251
+
252
+ def _process_link (self : FeedParserSensor , feed_entry : FeedParserDict ) -> str :
253
+ """Return link from feed entry."""
254
+ if "links" in feed_entry :
255
+ if len (feed_entry ["links" ]) > 1 :
256
+ _LOGGER .warning (
257
+ "Feed %s: More than one link found for %s. Using the first link." ,
227
258
self .name ,
228
259
feed_entry ,
229
260
)
230
- sensor_entry [
231
- "image"
232
- ] = DEFAULT_THUMBNAIL # use default image if no image found
261
+ return feed_entry ["links" ][0 ]["href" ]
262
+ return ""
233
263
234
264
@property
235
265
def feed_entries (self : FeedParserSensor ) -> list [dict [str , str ]]:
0 commit comments