Skip to content

Commit cb3ca7d

Browse files
authored
[Sofascore] Bypass TLS fingerprinting (#835)
1 parent 986df62 commit cb3ca7d

File tree

3 files changed

+45
-54
lines changed

3 files changed

+45
-54
lines changed

poetry.lock

Lines changed: 33 additions & 45 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ Changelog = "https://github.com/probberechts/soccerdata/releases"
2222
python = ">=3.9,<3.13"
2323
PySocks = "^1.7.1"
2424
Unidecode = "^1.2.0"
25-
cloudscraper = "^1.2.71"
2625
html5lib = "^1.1"
2726
pandas = "^2.0.0, !=2.1.0"
2827
requests = "^2.23"
@@ -32,6 +31,8 @@ unicode = "^2.7"
3231
lxml = "^4.9.3"
3332
socceraction = {version="^1.5.3", optional=true}
3433
packaging = "^24.1"
34+
wrapper-tls-requests = "^1.1.4"
35+
tqdm = "^4.67.1"
3536

3637
[tool.poetry.extras]
3738
socceraction = ["socceraction"]

soccerdata/_common.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
from pathlib import Path
1313
from typing import IO, Callable, Optional, Union
1414

15-
import cloudscraper
1615
import numpy as np
1716
import pandas as pd
1817
import requests
1918
import selenium
19+
import tls_requests
2020
import undetected_chromedriver as uc
2121
from dateutil.relativedelta import relativedelta
2222
from packaging import version
@@ -504,12 +504,14 @@ def __init__(
504504

505505
self._session = self._init_session()
506506

507-
def _init_session(self) -> requests.Session:
508-
session = cloudscraper.create_scraper(
509-
browser={"browser": "chrome", "platform": "linux", "mobile": False}
510-
)
511-
session.proxies.update(self.proxy())
512-
return session
507+
def _init_session(self) -> tls_requests.Client:
508+
proxy = self.proxy()
509+
proxy_url = None
510+
for protocol in ["https", "http"]:
511+
if protocol in proxy:
512+
proxy_url = proxy[protocol]
513+
break
514+
return tls_requests.Client(proxy=proxy_url)
513515

514516
def _download_and_save(
515517
self,
@@ -520,7 +522,7 @@ def _download_and_save(
520522
"""Download file at url to filepath. Overwrites if filepath exists."""
521523
for i in range(5):
522524
try:
523-
response = self._session.get(url, stream=True)
525+
response = self._session.get(url)
524526
time.sleep(self.rate_limit + random.random() * self.max_delay)
525527
response.raise_for_status()
526528
if var is not None:

0 commit comments

Comments
 (0)