66@author=bathomas @email=brian.a.thomas@nasa.gov
77"""
88
9+ import codecs
910import datetime
1011import json
1112import logging
2627# Create urllib3 Pool Manager
2728http = urllib3 .PoolManager ()
2829
30+ # function for decoding response text into utf-8 or utf-16
31+ def _decode_response_text (res ):
32+ """
33+ Decode APOD response bytes defensively because APOD occasionally serves
34+ UTF-16 content while declaring UTF-8 in headers.
35+ """
36+ content = res .content or b""
37+
38+ if content .startswith (codecs .BOM_UTF16_LE ) or content .startswith (codecs .BOM_UTF16_BE ):
39+ return content .decode ("utf-16" , errors = "replace" )
40+
41+ apparent = (res .apparent_encoding or "" ).lower ()
42+ if apparent .startswith ("utf-16" ):
43+ return content .decode ("utf-16" , errors = "replace" )
44+
45+ if res .text :
46+ return res .text
47+
48+ return content .decode ("utf-8" , errors = "replace" )
2949
3050# function for getting video thumbnails
3151def _get_thumbs (data ):
@@ -75,7 +95,7 @@ def _get_apod_chars(dt, thumbs):
7595 apod_url = "%sastropix.html" % BASE
7696 LOG .debug ("OPENING URL:" + apod_url )
7797 res = requests .get (apod_url )
78-
98+ page_text = _decode_response_text ( res )
7999 if res .status_code == 404 :
80100 return None
81101 # LOG.error(f'No APOD entry for URL: {apod_url}')
@@ -88,7 +108,7 @@ def _get_apod_chars(dt, thumbs):
88108
89109 # return default_obj_props
90110
91- soup = BeautifulSoup (res . text , "html.parser" )
111+ soup = BeautifulSoup (page_text , "html.parser" )
92112 LOG .debug ("getting the data url" )
93113 hd_data = None
94114 if soup .img :
0 commit comments