Skip to content

Commit 41e5680

Browse files
fix proxyhub with latest html structure parser
1 parent 621583a commit 41e5680

1 file changed

Lines changed: 16 additions & 9 deletions

File tree

freeproxy/modules/proxies/proxyhub.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import re
1010
import requests
1111
from bs4 import BeautifulSoup
12+
from urllib.parse import urljoin
1213
from .base import BaseProxiedSession
1314
from ..utils import filterinvalidproxies, applyfilterrule, ProxyInfo
1415

@@ -24,19 +25,25 @@ def __init__(self, **kwargs):
2425
@filterinvalidproxies
2526
def refreshproxies(self):
2627
# initialize
27-
self.candidate_proxies, session, urls = [], requests.Session(), []
28-
# obtain proxies
29-
try: (resp := session.get('https://proxyhub.me/', headers=self.getrandomheaders())).raise_for_status(); soup = BeautifulSoup(resp.text, 'lxml'); soup = soup.select_one("div.list table.table"); trs = soup.select("tbody tr")
30-
except Exception: return self.candidate_proxies
31-
for tr in trs:
32-
try: tds = tr.find_all("td"); urls.append(tds[4].find("a")['href'])
28+
self.candidate_proxies, session, urls, headers = [], requests.Session(), [], self.getrandomheaders()
29+
# obtain country urls
30+
(resp := session.get('https://proxyhub.me/', headers=headers, timeout=60)).raise_for_status()
31+
if not (table := BeautifulSoup(resp.text, 'lxml').select_one(".list table.table")): return self.candidate_proxies
32+
for tr in table.select("tbody tr"):
33+
try: tds = tr.find_all("td"); urls.append(tds[0].find("a")['href'])
3334
except Exception: continue
3435
if not (urls := list(set(urls))): return self.candidate_proxies
36+
# obtain proxies
3537
for url in urls:
36-
try: (resp := session.get(f'https://proxyhub.me{url}')).raise_for_status(); soup = BeautifulSoup(resp.text, 'lxml'); soup = soup.select_one("div.list table.table"); trs = soup.select("tbody tr"); m = re.search(r"/en/([a-z]{2})-free-proxy-list(?:\.html?)?$", url, re.IGNORECASE); country_code = m.group(1).upper()
37-
except Exception: continue
38+
try:
39+
(resp := session.get(urljoin('https://proxyhub.me/', url), headers=headers, timeout=60)).raise_for_status()
40+
if not (table := BeautifulSoup(resp.text, 'lxml').select_one(".list table.table")): continue
41+
if not (m := re.search(r"/en/([a-z]{2})-free-proxy-list(?:\.html?)?$", url, re.IGNORECASE)): continue
42+
country_code = m.group(1).upper(); trs = table.select("tbody tr")
43+
except Exception:
44+
continue
3845
for tr in trs:
39-
try: tds = tr.find_all("td"); proxy_info = ProxyInfo(source=self.source, protocol=tds[2].get_text(strip=True).strip().lower(), ip=tds[0].get_text(strip=True).strip(), port=tds[1].get_text(strip=True).strip(), anonymity=tds[3].get_text(strip=True).strip().lower(), country_code=country_code, in_chinese_mainland=(country_code.lower() in ['cn']))
46+
try: tds = tr.find_all("td"); proxy_info = ProxyInfo(source=self.source, protocol=tds[3].get_text(strip=True).strip().lower(), ip=tds[1].get_text(strip=True).strip(), port=tds[2].get_text(strip=True).strip(), anonymity=tds[4].get_text(strip=True).strip().lower(), country_code=country_code, in_chinese_mainland=(country_code.lower() in ['cn']))
4047
except Exception: continue
4148
self.candidate_proxies.append(proxy_info)
4249
# return

0 commit comments

Comments
 (0)