99import re
1010import requests
1111from bs4 import BeautifulSoup
12+ from urllib .parse import urljoin
1213from .base import BaseProxiedSession
1314from ..utils import filterinvalidproxies , applyfilterrule , ProxyInfo
1415
@@ -24,19 +25,25 @@ def __init__(self, **kwargs):
2425 @filterinvalidproxies
2526 def refreshproxies (self ):
2627 # initialize
27- self .candidate_proxies , session , urls = [], requests .Session (), []
28- # obtain proxies
29- try : (resp := session .get ('https://proxyhub.me/' , headers = self . getrandomheaders ())). raise_for_status (); soup = BeautifulSoup ( resp . text , 'lxml' ); soup = soup . select_one ( "div.list table.table" ); trs = soup . select ( "tbody tr" )
30- except Exception : return self .candidate_proxies
31- for tr in trs :
32- try : tds = tr .find_all ("td" ); urls .append (tds [4 ].find ("a" )['href' ])
28+ self .candidate_proxies , session , urls , headers = [], requests .Session (), [], self . getrandomheaders ()
29+ # obtain country urls
30+ (resp := session .get ('https://proxyhub.me/' , headers = headers , timeout = 60 )). raise_for_status ( )
31+ if not ( table := BeautifulSoup ( resp . text , 'lxml' ). select_one ( ".list table.table" )) : return self .candidate_proxies
32+ for tr in table . select ( "tbody tr" ) :
33+ try : tds = tr .find_all ("td" ); urls .append (tds [0 ].find ("a" )['href' ])
3334 except Exception : continue
3435 if not (urls := list (set (urls ))): return self .candidate_proxies
36+ # obtain proxies
3537 for url in urls :
36- try : (resp := session .get (f'https://proxyhub.me{ url } ' )).raise_for_status (); soup = BeautifulSoup (resp .text , 'lxml' ); soup = soup .select_one ("div.list table.table" ); trs = soup .select ("tbody tr" ); m = re .search (r"/en/([a-z]{2})-free-proxy-list(?:\.html?)?$" , url , re .IGNORECASE ); country_code = m .group (1 ).upper ()
37- except Exception : continue
38+ try :
39+ (resp := session .get (urljoin ('https://proxyhub.me/' , url ), headers = headers , timeout = 60 )).raise_for_status ()
40+ if not (table := BeautifulSoup (resp .text , 'lxml' ).select_one (".list table.table" )): continue
41+ if not (m := re .search (r"/en/([a-z]{2})-free-proxy-list(?:\.html?)?$" , url , re .IGNORECASE )): continue
42+ country_code = m .group (1 ).upper (); trs = table .select ("tbody tr" )
43+ except Exception :
44+ continue
3845 for tr in trs :
39- try : tds = tr .find_all ("td" ); proxy_info = ProxyInfo (source = self .source , protocol = tds [2 ].get_text (strip = True ).strip ().lower (), ip = tds [0 ].get_text (strip = True ).strip (), port = tds [1 ].get_text (strip = True ).strip (), anonymity = tds [3 ].get_text (strip = True ).strip ().lower (), country_code = country_code , in_chinese_mainland = (country_code .lower () in ['cn' ]))
46+ try : tds = tr .find_all ("td" ); proxy_info = ProxyInfo (source = self .source , protocol = tds [3 ].get_text (strip = True ).strip ().lower (), ip = tds [1 ].get_text (strip = True ).strip (), port = tds [2 ].get_text (strip = True ).strip (), anonymity = tds [4 ].get_text (strip = True ).strip ().lower (), country_code = country_code , in_chinese_mainland = (country_code .lower () in ['cn' ]))
4047 except Exception : continue
4148 self .candidate_proxies .append (proxy_info )
4249 # return
0 commit comments