diff --git a/vbscraper.py b/vbscraper.py index c314e02..f8837b8 100644 --- a/vbscraper.py +++ b/vbscraper.py @@ -15,6 +15,7 @@ import os import os.path +html_parser = "html.parser" # let BeautifulSoup choose html parser PostboxDocument = collections.namedtuple( 'PostboxDocument', @@ -72,7 +73,7 @@ def login(self, username): ) # Parse returned page - soup = bs4.BeautifulSoup(r.text) + soup = bs4.BeautifulSoup(r.text, html_parser) if not soup.find(text='Finanzstatus'): raise RuntimeError('Login to Volksbank Online Banking failed.') @@ -111,7 +112,7 @@ def postbox_items(self): r = self.s.get(self.base_url + self.postbox_url) # Parse Postbox page - soup = bs4.BeautifulSoup(r.text) + soup = bs4.BeautifulSoup(r.text, html_parser) # Get number of pages li = soup.find('li', attrs={'class': 'gad-paginationActivePageNumber'}) @@ -181,7 +182,7 @@ def postbox_items(self): r = self.s.get(self.base_url + a_next_page['href']) # Parse Postbox page - soup = bs4.BeautifulSoup(r.text) + soup = bs4.BeautifulSoup(r.text, html_parser) return ret @@ -197,7 +198,7 @@ def download_document(self, document, destinations): r = self.s.get(self.base_url + self.postbox_url) # Parse Postbox page - soup = bs4.BeautifulSoup(r.text) + soup = bs4.BeautifulSoup(r.text, html_parser) # Get current page number li = soup.find( @@ -227,7 +228,7 @@ def download_document(self, document, destinations): r = self.s.get(self.base_url + document.url) # parse message page - soup = bs4.BeautifulSoup(r.text) + soup = bs4.BeautifulSoup(r.text, html_parser) #subject = msg_soup.find( # 'label', attrs={'for': 'messageSenderSubject'}