andsor · moba · Feb 23, 2019
diff --git a/vbscraper.py b/vbscraper.py
@@ -15,6 +15,7 @@
 import os
 import os.path
 
+html_parser = "html.parser" # let BeautifulSoup choose html parser
 
 PostboxDocument = collections.namedtuple(
     'PostboxDocument',
@@ -72,7 +73,7 @@ def login(self, username):
         )
 
         # Parse returned page
-        soup = bs4.BeautifulSoup(r.text)
+        soup = bs4.BeautifulSoup(r.text, html_parser)
 
         if not soup.find(text='Finanzstatus'):
             raise RuntimeError('Login to Volksbank Online Banking failed.')
@@ -111,7 +112,7 @@ def postbox_items(self):
         r = self.s.get(self.base_url + self.postbox_url)
 
         # Parse Postbox page
-        soup = bs4.BeautifulSoup(r.text)
+        soup = bs4.BeautifulSoup(r.text, html_parser)
 
         # Get number of pages
         li = soup.find('li', attrs={'class': 'gad-paginationActivePageNumber'})
@@ -181,7 +182,7 @@ def postbox_items(self):
             r = self.s.get(self.base_url + a_next_page['href'])
 
             # Parse Postbox page
-            soup = bs4.BeautifulSoup(r.text)
+            soup = bs4.BeautifulSoup(r.text, html_parser)
 
         return ret
 
@@ -197,7 +198,7 @@ def download_document(self, document, destinations):
         r = self.s.get(self.base_url + self.postbox_url)
 
         # Parse Postbox page
-        soup = bs4.BeautifulSoup(r.text)
+        soup = bs4.BeautifulSoup(r.text, html_parser)
 
         # Get current page number
         li = soup.find(
@@ -227,7 +228,7 @@ def download_document(self, document, destinations):
         r = self.s.get(self.base_url + document.url)
 
         # parse message page
-        soup = bs4.BeautifulSoup(r.text)
+        soup = bs4.BeautifulSoup(r.text, html_parser)
 
         #subject = msg_soup.find(
         #    'label', attrs={'for': 'messageSenderSubject'}