probberechts · probberechts · Jan 16, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 7, 2026
diff --git a/.dvc/config b/.dvc/config
@@ -1,4 +1,5 @@
 [core]
     remote = myremote
+    autostage = true
 ['remote "myremote"']
     url = gs://soccerdata-test-data
diff --git a/Makefile b/Makefile
@@ -7,8 +7,6 @@
 # Rules can depend on other rules which run first. Rules with _ prefix are internal helpers.
 
 MODULE_NAME = soccerdata
-PYTHON_VERSION = 3.9
-PYTHON_INTERPRETER = python
 DOCS_PORT ?= 8000
 SOCCERDATA_DIR ?= tests/appdata
 .DEFAULT_GOAL := help
@@ -97,9 +95,6 @@ create-env: ## Set up python interpreter environment
 requirements: ## Install Python Dep
 	uv sync
 
-.PHONY: publish-all
-publish-all: format lint publish docs-publish ## Run format, lint, publish package and docs
-
 # ━━━━━━━━━━━━━━━━━━━━━━━━━━━ Pre-Commits ━━━━━━━━━━━━━━━━━━━━━━━━━━ #
 
 .PHONY: pre-commit-test pre-commit-update

diff --git a/soccerdata/clubelo.py b/soccerdata/clubelo.py
@@ -110,7 +110,9 @@ def read_by_date(self, date: Optional[Union[str, datetime]] = None) -> pd.DataFr
             .set_index("team")
         )
 
-    def read_team_history(self, team: str, max_age: Union[int, timedelta] = 1) -> pd.DataFrame:
+    def read_team_history(
+        self, team: str, max_age: Optional[Union[int, timedelta]] = 1
+    ) -> pd.DataFrame:
         """Retrieve full ELO history for one club.
 
         For the exact spelling of a club's name, check the result of
@@ -124,7 +126,8 @@ def read_team_history(self, team: str, max_age: Union[int, timedelta] = 1) -> pd
         team : str
             The club's name.
         max_age : int for age in days, or timedelta object
-            The max. age of locally cached file before re-download.
+            The max. age of locally cached file before re-download. To disable
+            re-downloading, set to None.
 
         Raises
         ------

diff --git a/soccerdata/fotmob.py b/soccerdata/fotmob.py
@@ -70,16 +70,14 @@ def __init__(
             (self.data_dir / "seasons").mkdir(parents=True, exist_ok=True)
             (self.data_dir / "matches").mkdir(parents=True, exist_ok=True)
 
-    def _init_session(self) -> tls_requests.Client:
-        session = super()._init_session()
+    def _init_session(self, headers: Optional[dict[str, str]] = None) -> tls_requests.Client:
         try:
             r = tls_requests.get("http://46.101.91.154:6006/")
             r.raise_for_status()
         except tls_requests.exceptions.HTTPError:
             raise ConnectionError("Unable to connect to the session cookie server.")
         result = r.json()
-        session.headers.update(result)
-        return session
+        return super()._init_session(headers=result)
 
     @property
     def leagues(self) -> list[str]:

diff --git a/soccerdata/understat.py b/soccerdata/understat.py
@@ -707,7 +707,9 @@ def _request_api(
         self, url: str, filepath: Optional[Path] = None, no_cache: bool = False
     ) -> IO[bytes]:
         """Make an API request with proper headers and caching."""
-        is_cached = filepath is not None and filepath.exists() and not no_cache and not self.no_cache
+        is_cached = (
+            filepath is not None and filepath.exists() and not no_cache and not self.no_cache
+        )
         if is_cached and filepath is not None:
             return filepath.open(mode="rb")
 
@@ -725,7 +727,7 @@ def _request_api(
     @staticmethod
     def _extract_team_name(html: str) -> str:
         """Extract team name from tmpl HTML."""
-        match = re.search(r'<h3><a[^>]*>([^<]+)</a></h3>', html)
+        match = re.search(r"<h3><a[^>]*>([^<]+)</a></h3>", html)
         if match:
             return match.group(1)
         return ""
@@ -756,4 +758,4 @@ def _as_str(value: Any) -> Optional[str]:
     try:
         return unescape(value)
     except (TypeError, ValueError):
-        return None
+        return None
diff --git a/tests/appdata/.gitignore b/tests/appdata/.gitignore
@@ -0,0 +1 @@
+/data
diff --git a/tests/appdata/data.dvc b/tests/appdata/data.dvc
@@ -1,6 +1,6 @@
 outs:
-- md5: 105c9a198a6d5dad8ef76ca820a3b3ed.dir
-  size: 242939408
-  nfiles: 1070
+- md5: 875464d3a845bdaee53c992de3e08ebe.dir
+  size: 232008858
+  nfiles: 885
   hash: md5
   path: data
diff --git a/tests/test_ClubElo.py b/tests/test_ClubElo.py
@@ -23,6 +23,7 @@ def _check_dataframe(self, df: pd.DataFrame) -> None:
         assert pd.api.types.is_datetime64_any_dtype(df["from"])
         assert pd.api.types.is_datetime64_any_dtype(df["to"])
 
+    @pytest.mark.fails_gha
     def test_default(self, elo: ClubElo) -> None:
         """It should return a dataframe with the latest ELO ratings if no date is given."""
         df = elo.read_by_date()
@@ -63,27 +64,27 @@ def _check_dataframe(self, df: pd.DataFrame) -> None:
 
     def test_with_valid_team(self, elo: ClubElo) -> None:
         """It should return a dataframe with the ELO history for the specified club."""
-        df = elo.read_team_history("Feyenoord")
+        df = elo.read_team_history("Feyenoord", max_age=None)
         self._check_dataframe(df)
 
     def test_with_teamname_replacements(self, elo: ClubElo) -> None:
         """It should use the replacement names from teamname_replacements.json."""
         # ClubElo uses "Man City" as the team name
-        df_original = elo.read_team_history("Man City")
-        df_replacement = elo.read_team_history("Manchester City")
+        df_original = elo.read_team_history("Man City", max_age=None)
+        df_replacement = elo.read_team_history("Manchester City", max_age=None)
         assert df_original.equals(df_replacement)
 
     def test_raises_when_team_not_found(self, elo: ClubElo) -> None:
         """It should raise an error if the team is not found."""
         with pytest.raises(ValueError, match="No data found for team FC Knudde"):
-            _ = elo.read_team_history("FC Knudde")
+            _ = elo.read_team_history("FC Knudde", max_age=None)
 
     def test_handles_special_characters_in_team_names(self, elo: ClubElo) -> None:
         """It should be able to deal with special characters in team names."""
-        df = elo.read_team_history("Brighton & Hove Albion")
+        df = elo.read_team_history("Brighton & Hove Albion", max_age=None)
         self._check_dataframe(df)
         with pytest.raises(ValueError, match="No data found for team Team & City"):
-            _ = elo.read_team_history("Team & City")
+            _ = elo.read_team_history("Team & City", max_age=None)
 
     @pytest.mark.fails_gha
     def test_respects_max_age_and_updates_cache(self, elo: ClubElo) -> None:

diff --git a/tests/test_Integration.py b/tests/test_Integration.py
@@ -1,9 +1,5 @@
 """Integration tests for soccerdata package."""
 
-import pandas as pd
-import pytest
-
-import soccerdata as foo
 
 # TODO: integration tests
 # Names of common leagues equal for all classes
@@ -13,21 +9,24 @@
 # Scores per game equal for all common leagues over classes
 
 
-@pytest.mark.e2e
-def test_mh_vs_elo():
-    """We should be able to retrieve the Elo history for all teams in these leagues."""
-    league_sel = [
-        "ENG-Premier League",
-        "ESP-La Liga",
-        "FRA-Ligue 1",
-        "GER-Bundesliga",
-        "ITA-Serie A",
-    ]
-
-    mh = foo.MatchHistory(leagues=league_sel, seasons="1819")
-    mh_games = mh.read_games()
-
-    elo = foo.ClubElo()
-    elo_hist = pd.concat([elo.read_team_history(team) for team in set(mh_games["home_team"])])
-
-    assert set(mh_games["home_team"]) - set(elo_hist["team"]) == set()
+# FIXME: disable for now as ClubElo is flaky
+# @pytest.mark.e2e
+# def test_mh_vs_elo():
+#     """We should be able to retrieve the Elo history for all teams in these leagues."""
+#     league_sel = [
+#         "ENG-Premier League",
+#         "ESP-La Liga",
+#         "FRA-Ligue 1",
+#         "GER-Bundesliga",
+#         "ITA-Serie A",
+#     ]
+#
+#     mh = foo.MatchHistory(leagues=league_sel, seasons="1819")
+#     mh_games = mh.read_games()
+#
+#     elo = foo.ClubElo()
+#     elo_hist = pd.concat(
+#         [elo.read_team_history(team, max_age=None) for team in set(mh_games["home_team"])]
+#     )
+#
+#     assert set(mh_games["home_team"]) - set(elo_hist["team"]) == set()
diff --git a/tests/test_common.py b/tests/test_common.py
@@ -2,6 +2,7 @@
 
 import json
 from datetime import datetime, timezone
+from unittest.mock import MagicMock, patch
 
 import pandas as pd
 import pytest
@@ -20,55 +21,133 @@
 # _download_and_save
 
 
-def test_download_and_save_not_cached(tmp_path):
+@pytest.fixture
+def mock_tls_client():
+    # Patch the session's get method
+    # Change 'your_module' to the actual module name
+    with patch("tls_requests.Client.get") as mock_get:
+
+        def _return_csv(content="Rank,Club,Country\n1,Barcelona,ESP"):
+            mock_resp = MagicMock()
+            mock_resp.content = content.encode("utf-8")
+            mock_resp.status_code = 200
+            mock_resp.raise_for_status = lambda: None
+            mock_get.return_value = mock_resp
+            return mock_get
+
+        def _return_js_var(var_name="statData", data={"key": "value"}):
+            """
+            Mimics: var name = JSON.parse('\x7b\x22key\x22\x3a\x22value\x22\x7d')
+            The regex in the reader expects string-escaped content inside single quotes.
+            """
+            # 1. Convert dict to JSON string
+            json_str = json.dumps(data)
+            # 2. Escape double quotes so it survives being wrapped in single quotes
+            # and works with the reader's .decode("unicode_escape")
+            escaped_json = json_str.replace('"', '\\"')
+
+            html = f"var {var_name} = JSON.parse('{escaped_json}')"
+
+            mock_resp = MagicMock()
+            mock_resp.content = html.encode("utf-8")
+            mock_resp.status_code = 200
+            mock_resp.raise_for_status = lambda: None
+            mock_get.return_value = mock_resp
+            return mock_get
+
+        mock_get.return_csv = _return_csv
+        mock_get.return_js_var = _return_js_var
+        yield mock_get
+
+
+# --- Tests ---
+
+
+def test_download_and_save_not_cached(tmp_path, mock_tls_client):
+    # Setup mock
+    mock_tls_client.return_csv()
+
     reader = BaseRequestsReader()
     url = "http://api.clubelo.com/Barcelona"
     filepath = tmp_path / "Barcelona.csv"
-    data = reader._download_and_save(url, filepath)
+    data = reader.get(url, filepath)
+
     assert isinstance(pd.read_csv(data), pd.DataFrame)
+    assert filepath.exists()
+
 
+def test_download_and_save_cached(tmp_path, mock_tls_client):
+    # Setup mock
+    mock_tls_client.return_csv()
 
-def test_download_and_save_cached(tmp_path):
     reader = BaseRequestsReader()
     url = "http://api.clubelo.com/Barcelona"
     filepath = tmp_path / "Barcelona.csv"
-    data = reader._download_and_save(url, filepath)
-    data = reader._download_and_save(url, filepath)
+
+    # First call: triggers the mock/download
+    reader.get(url, filepath)
+    # Second call: should read from disk
+    data = reader.get(url, filepath)
+
     assert isinstance(pd.read_csv(data), pd.DataFrame)
+    # Verify the network was only hit once
+    assert mock_tls_client.call_count == 1
+
 
+def test_download_and_save_no_cache(tmp_path, mock_tls_client):
+    # Setup mock with at least 2 rows of data
+    mock_tls_client.return_csv("Col1,Col2\nVal1,Val2\nVal3,Val4")
 
-def test_download_and_save_no_cache(tmp_path):
     reader = BaseRequestsReader(no_cache=True)
     url = "http://api.clubelo.com/Barcelona"
     filepath = tmp_path / "Barcelona.csv"
+
+    # Pre-populate with bogus data
     filepath.write_text("bogus")
-    data = reader._download_and_save(url, filepath)
-    assert len(pd.read_csv(data)) > 1
 
+    data = reader.get(url, filepath)
+    # If no_cache=True, it should have overwritten "bogus" with our 2-row CSV
+    assert len(pd.read_csv(data)) >= 2
+
+
+def test_download_and_save_no_store_no_filepath(mock_tls_client):
+    # Setup mock
+    mock_tls_client.return_csv()
 
-def test_download_and_save_no_store_no_filepath():
     reader = BaseRequestsReader(no_store=True)
     url = "http://api.clubelo.com/Barcelona"
-    data = reader._download_and_save(url, filepath=None)
+    data = reader.get(url, filepath=None)
+
     assert isinstance(pd.read_csv(data), pd.DataFrame)
 
 
-def test_download_and_save_no_cache_filepath(tmp_path):
+def test_download_and_save_no_cache_filepath(tmp_path, mock_tls_client):
+    # Setup mock
+    mock_tls_client.return_csv()
+
     reader = BaseRequestsReader(no_store=True)
     url = "http://api.clubelo.com/Barcelona"
     filepath = tmp_path / "Barcelona.csv"
-    data = reader._download_and_save(url, filepath)
+
+    data = reader.get(url, filepath)
+
     assert isinstance(pd.read_csv(data), pd.DataFrame)
+    # no_store=True means the file should be deleted or never written
     assert not filepath.exists()
 
 
-def test_download_and_save_variable_no_store_no_filepath():
+def test_download_and_save_variable_no_store_no_filepath(mock_tls_client):
+    # Setup mock using the JS variable helper
+    mock_tls_client.return_js_var(var_name="statData", data={"player": "Messi", "goals": 10})
+
     reader = BaseRequestsReader(no_store=True)
     url = "https://understat.com/"
-    data = reader._download_and_save(url, filepath=None, var="statData")
+    data = reader.get(url, filepath=None, var="statData")
+
     stats = json.load(data)
     assert isinstance(stats, dict)
-    assert "statData" in stats
+    # the result is wrapped in {var_name: data}
+    assert stats["statData"]["player"] == "Messi"
 
 
 # def test_download_and_save_requests_tor(tmp_path):