@@ -12,12 +12,23 @@ def parse_price(price_raw: Optional[str]) -> Optional[int]:
1212 return int (price_cleaned ) if price_cleaned else None
1313
1414def parse_page (url : str ) -> Dict [str , Optional [any ]]:
15+ headers = {
16+ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' ,
17+ 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8' ,
18+ 'Accept-Language' : 'en-US,en;q=0.9' ,
19+ 'Accept-Encoding' : 'gzip, deflate, br' ,
20+ 'Connection' : 'keep-alive' ,
21+ 'Upgrade-Insecure-Requests' : '1' ,
22+ 'Sec-Fetch-Dest' : 'document' ,
23+ 'Sec-Fetch-Mode' : 'navigate' ,
24+ 'Sec-Fetch-Site' : 'none' ,
25+ 'Sec-Fetch-User' : '?1' ,
26+ }
1527 logging .debug ("Parsing page: %s" , url )
16- response = requests .get (url )
28+ response = requests .get (url , headers = headers )
1729 soup = BeautifulSoup (response .text , 'html.parser' )
1830
1931 listings = soup .select ('section.re-layoutContentCenter' )
20-
2132 for listing in listings :
2233 city = listing .select_one ('div.re-title__content span.re-blockTitle__location' )
2334 neighbourhood = listing .select_one ('div.re-title__content span.re-blockTitle__location:nth-of-type(2)' )
@@ -53,12 +64,23 @@ def parse_page(url: str) -> Dict[str, Optional[any]]:
5364 "floor" : floor ,
5465 "garage_info" : garage_info ,
5566 }
56-
5767 return data
5868
5969def parse_listing (url : str ) -> List [Dict [str , Optional [any ]]]:
70+ headers = {
71+ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' ,
72+ 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8' ,
73+ 'Accept-Language' : 'en-US,en;q=0.9' ,
74+ 'Accept-Encoding' : 'gzip, deflate, br' ,
75+ 'Connection' : 'keep-alive' ,
76+ 'Upgrade-Insecure-Requests' : '1' ,
77+ 'Sec-Fetch-Dest' : 'document' ,
78+ 'Sec-Fetch-Mode' : 'navigate' ,
79+ 'Sec-Fetch-Site' : 'none' ,
80+ 'Sec-Fetch-User' : '?1' ,
81+ }
6082 logging .debug ("Fetching main listing page: %s" , url )
61- response = requests .get (url )
83+ response = requests .get (url , headers = headers )
6284 soup = BeautifulSoup (response .text , 'html.parser' )
6385 data_list = []
6486 links = soup .select ('a.in-listingCardTitle' )
0 commit comments