|
2 | 2 |
|
3 | 3 | import json |
4 | 4 | from datetime import datetime, timezone |
| 5 | +from io import StringIO |
| 6 | +from unittest.mock import MagicMock, patch |
5 | 7 |
|
6 | 8 | import pandas as pd |
7 | 9 | import pytest |
|
20 | 22 | # _download_and_save |
21 | 23 |
|
22 | 24 |
|
23 | | -def test_download_and_save_not_cached(tmp_path): |
| 25 | +@pytest.fixture |
| 26 | +def mock_tls_client(): |
| 27 | + # Patch the session's get method |
| 28 | + # Change 'your_module' to the actual module name |
| 29 | + with patch("tls_requests.Client.get") as mock_get: |
| 30 | + |
| 31 | + def _return_csv(content="Rank,Club,Country\n1,Barcelona,ESP"): |
| 32 | + mock_resp = MagicMock() |
| 33 | + mock_resp.content = content.encode("utf-8") |
| 34 | + mock_resp.status_code = 200 |
| 35 | + mock_resp.raise_for_status = lambda: None |
| 36 | + mock_get.return_value = mock_resp |
| 37 | + return mock_get |
| 38 | + |
| 39 | + def _return_js_var(var_name="statData", data={"key": "value"}): |
| 40 | + """ |
| 41 | + Mimics: var name = JSON.parse('\x7b\x22key\x22\x3a\x22value\x22\x7d') |
| 42 | + The regex in the reader expects string-escaped content inside single quotes. |
| 43 | + """ |
| 44 | + # 1. Convert dict to JSON string |
| 45 | + json_str = json.dumps(data) |
| 46 | + # 2. Escape double quotes so it survives being wrapped in single quotes |
| 47 | + # and works with the reader's .decode("unicode_escape") |
| 48 | + escaped_json = json_str.replace('"', '\\"') |
| 49 | + |
| 50 | + html = f"var {var_name} = JSON.parse('{escaped_json}')" |
| 51 | + |
| 52 | + mock_resp = MagicMock() |
| 53 | + mock_resp.content = html.encode("utf-8") |
| 54 | + mock_resp.status_code = 200 |
| 55 | + mock_resp.raise_for_status = lambda: None |
| 56 | + mock_get.return_value = mock_resp |
| 57 | + return mock_get |
| 58 | + |
| 59 | + mock_get.return_csv = _return_csv |
| 60 | + mock_get.return_js_var = _return_js_var |
| 61 | + yield mock_get |
| 62 | + |
| 63 | + |
| 64 | +# --- Tests --- |
| 65 | + |
| 66 | + |
| 67 | +def test_download_and_save_not_cached(tmp_path, mock_tls_client): |
| 68 | + # Setup mock |
| 69 | + mock_tls_client.return_csv() |
| 70 | + |
24 | 71 | reader = BaseRequestsReader() |
25 | 72 | url = "http://api.clubelo.com/Barcelona" |
26 | 73 | filepath = tmp_path / "Barcelona.csv" |
27 | | - data = reader._download_and_save(url, filepath) |
| 74 | + data = reader.get(url, filepath) |
| 75 | + |
28 | 76 | assert isinstance(pd.read_csv(data), pd.DataFrame) |
| 77 | + assert filepath.exists() |
| 78 | + |
29 | 79 |
|
| 80 | +def test_download_and_save_cached(tmp_path, mock_tls_client): |
| 81 | + # Setup mock |
| 82 | + mock_tls_client.return_csv() |
30 | 83 |
|
31 | | -def test_download_and_save_cached(tmp_path): |
32 | 84 | reader = BaseRequestsReader() |
33 | 85 | url = "http://api.clubelo.com/Barcelona" |
34 | 86 | filepath = tmp_path / "Barcelona.csv" |
35 | | - data = reader._download_and_save(url, filepath) |
36 | | - data = reader._download_and_save(url, filepath) |
| 87 | + |
| 88 | + # First call: triggers the mock/download |
| 89 | + reader.get(url, filepath) |
| 90 | + # Second call: should read from disk |
| 91 | + data = reader.get(url, filepath) |
| 92 | + |
37 | 93 | assert isinstance(pd.read_csv(data), pd.DataFrame) |
| 94 | + # Verify the network was only hit once |
| 95 | + assert mock_tls_client.call_count == 1 |
| 96 | + |
38 | 97 |
|
| 98 | +def test_download_and_save_no_cache(tmp_path, mock_tls_client): |
| 99 | + # Setup mock with at least 2 rows of data |
| 100 | + mock_tls_client.return_csv("Col1,Col2\nVal1,Val2\nVal3,Val4") |
39 | 101 |
|
40 | | -def test_download_and_save_no_cache(tmp_path): |
41 | 102 | reader = BaseRequestsReader(no_cache=True) |
42 | 103 | url = "http://api.clubelo.com/Barcelona" |
43 | 104 | filepath = tmp_path / "Barcelona.csv" |
| 105 | + |
| 106 | + # Pre-populate with bogus data |
44 | 107 | filepath.write_text("bogus") |
45 | | - data = reader._download_and_save(url, filepath) |
46 | | - assert len(pd.read_csv(data)) > 1 |
47 | 108 |
|
| 109 | + data = reader.get(url, filepath) |
| 110 | + # If no_cache=True, it should have overwritten "bogus" with our 2-row CSV |
| 111 | + assert len(pd.read_csv(data)) >= 2 |
| 112 | + |
| 113 | + |
| 114 | +def test_download_and_save_no_store_no_filepath(mock_tls_client): |
| 115 | + # Setup mock |
| 116 | + mock_tls_client.return_csv() |
48 | 117 |
|
49 | | -def test_download_and_save_no_store_no_filepath(): |
50 | 118 | reader = BaseRequestsReader(no_store=True) |
51 | 119 | url = "http://api.clubelo.com/Barcelona" |
52 | | - data = reader._download_and_save(url, filepath=None) |
| 120 | + data = reader.get(url, filepath=None) |
| 121 | + |
53 | 122 | assert isinstance(pd.read_csv(data), pd.DataFrame) |
54 | 123 |
|
55 | 124 |
|
56 | | -def test_download_and_save_no_cache_filepath(tmp_path): |
| 125 | +def test_download_and_save_no_cache_filepath(tmp_path, mock_tls_client): |
| 126 | + # Setup mock |
| 127 | + mock_tls_client.return_csv() |
| 128 | + |
57 | 129 | reader = BaseRequestsReader(no_store=True) |
58 | 130 | url = "http://api.clubelo.com/Barcelona" |
59 | 131 | filepath = tmp_path / "Barcelona.csv" |
60 | | - data = reader._download_and_save(url, filepath) |
| 132 | + |
| 133 | + data = reader.get(url, filepath) |
| 134 | + |
61 | 135 | assert isinstance(pd.read_csv(data), pd.DataFrame) |
| 136 | + # no_store=True means the file should be deleted or never written |
62 | 137 | assert not filepath.exists() |
63 | 138 |
|
64 | 139 |
|
65 | | -def test_download_and_save_variable_no_store_no_filepath(): |
| 140 | +def test_download_and_save_variable_no_store_no_filepath(mock_tls_client): |
| 141 | + # Setup mock using the JS variable helper |
| 142 | + mock_tls_client.return_js_var(var_name="statData", data={"player": "Messi", "goals": 10}) |
| 143 | + |
66 | 144 | reader = BaseRequestsReader(no_store=True) |
67 | 145 | url = "https://understat.com/" |
68 | | - data = reader._download_and_save(url, filepath=None, var="statData") |
| 146 | + data = reader.get(url, filepath=None, var="statData") |
| 147 | + |
69 | 148 | stats = json.load(data) |
70 | 149 | assert isinstance(stats, dict) |
71 | | - assert "statData" in stats |
| 150 | + # the result is wrapped in {var_name: data} |
| 151 | + assert stats["statData"]["player"] == "Messi" |
72 | 152 |
|
73 | 153 |
|
74 | 154 | # def test_download_and_save_requests_tor(tmp_path): |
|
0 commit comments