Skip to content

Commit b520327

Browse files
author
Brazda, Tessa M. (JSC-IO111)[EAST2]
committed
added function to differentiate between UTF-8 and UTF-16 text formatting point#3
1 parent 4953ca6 commit b520327

1 file changed

Lines changed: 22 additions & 2 deletions

File tree

apod/utility.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
@author=bathomas @email=brian.a.thomas@nasa.gov
77
"""
88

9+
import codecs
910
import datetime
1011
import json
1112
import logging
@@ -26,6 +27,25 @@
2627
# Create urllib3 Pool Manager
2728
http = urllib3.PoolManager()
2829

30+
# function for decoding response text into utf-8 or utf-16
31+
def _decode_response_text(res):
32+
"""
33+
Decode APOD response bytes defensively because APOD occasionally serves
34+
UTF-16 content while declaring UTF-8 in headers.
35+
"""
36+
content = res.content or b""
37+
38+
if content.startswith(codecs.BOM_UTF16_LE) or content.startswith(codecs.BOM_UTF16_BE):
39+
return content.decode("utf-16", errors="replace")
40+
41+
apparent = (res.apparent_encoding or "").lower()
42+
if apparent.startswith("utf-16"):
43+
return content.decode("utf-16", errors="replace")
44+
45+
if res.text:
46+
return res.text
47+
48+
return content.decode("utf-8", errors="replace")
2949

3050
# function for getting video thumbnails
3151
def _get_thumbs(data):
@@ -75,7 +95,7 @@ def _get_apod_chars(dt, thumbs):
7595
apod_url = "%sastropix.html" % BASE
7696
LOG.debug("OPENING URL:" + apod_url)
7797
res = requests.get(apod_url)
78-
98+
page_text = _decode_response_text(res)
7999
if res.status_code == 404:
80100
return None
81101
# LOG.error(f'No APOD entry for URL: {apod_url}')
@@ -88,7 +108,7 @@ def _get_apod_chars(dt, thumbs):
88108

89109
# return default_obj_props
90110

91-
soup = BeautifulSoup(res.text, "html.parser")
111+
soup = BeautifulSoup(page_text, "html.parser")
92112
LOG.debug("getting the data url")
93113
hd_data = None
94114
if soup.img:

0 commit comments

Comments
 (0)