forked from yandex-cloud/yandex-ai-studio-sdk
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_download_datasets.py
More file actions
191 lines (150 loc) · 5.95 KB
/
test_download_datasets.py
File metadata and controls
191 lines (150 loc) · 5.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import tempfile
from pathlib import Path
import httpx
import pytest
from pytest_httpx import HTTPXMock
from yandex.cloud.ai.dataset.v1.dataset_pb2 import DatasetInfo
from yandex_cloud_ml_sdk._datasets.dataset import AsyncDataset
@pytest.fixture
def mock_dataset(mocker, tmp_path: Path) -> AsyncDataset:
"""Create a mock dataset for testing."""
sdk_mock = mocker.MagicMock()
sdk_mock._client.httpx.return_value = httpx.AsyncClient()
dataset = AsyncDataset._from_proto(
sdk=sdk_mock,
proto=DatasetInfo(
dataset_id="id"
)
)
mocker.patch(
"tempfile.gettempdir",
return_value=str(tmp_path),
)
return dataset
@pytest.mark.asyncio
async def test_download_to_temp_dir(mock_dataset, httpx_mock: HTTPXMock, mocker):
"""Test downloading dataset to a temporary directory."""
mocker.patch.object(
mock_dataset, "_get_download_urls",
return_value=[("file1.txt", "https://example.com/file1.txt")]
)
# Mock the HTTP response
httpx_mock.add_response(
url="https://example.com/file1.txt",
content=b"test file content"
)
paths = await mock_dataset.download(timeout=30)
temp_dir = Path(tempfile.gettempdir()) / "ycml" / "datasets" / mock_dataset.id
assert temp_dir.exists()
assert paths == [temp_dir / "file1.txt"]
assert paths[0].read_bytes() == b"test file content"
@pytest.mark.asyncio
async def test_download_to_custom_dir(mock_dataset, tmp_path, httpx_mock: HTTPXMock, mocker):
"""Test downloading dataset to a custom directory."""
# Create empty directory
empty_dir = tmp_path / "empty"
empty_dir.mkdir()
mocker.patch.object(
mock_dataset, "_get_download_urls",
return_value=[("file1.txt", "https://example.com/file1.txt")]
)
# Mock the HTTP response
httpx_mock.add_response(
url="https://example.com/file1.txt",
content=b"test file content"
)
# Call download method with custom path
paths = await mock_dataset.download(download_path=empty_dir, timeout=30)
assert paths == [empty_dir / "file1.txt"]
assert paths[0].read_bytes() == b"test file content"
@pytest.mark.asyncio
async def test_download_multiple_files(httpx_mock: HTTPXMock, mock_dataset, tmp_path, mocker):
"""Test downloading multiple files from a dataset."""
# Create empty directory
empty_dir = tmp_path / "empty"
empty_dir.mkdir()
# Mock the _get_download_urls method
mocker.patch.object(
mock_dataset, "_get_download_urls",
return_value=[
("file1.txt", "https://example.com/file1.txt"),
("file2.txt", "https://example.com/file2.txt"),
]
)
# Mock the HTTP responses
httpx_mock.add_response(
url="https://example.com/file1.txt",
content=b"content of file 1"
)
httpx_mock.add_response(
url="https://example.com/file2.txt",
content=b"content of file 2"
)
# Call download method
result = await mock_dataset.download(download_path=empty_dir, timeout=30)
# Verify the result
paths = list(result)
assert len(paths) == 2
assert {p.name for p in paths} == {"file1.txt", "file2.txt"}
assert (empty_dir / "file1.txt").read_bytes() == b"content of file 1"
assert (empty_dir / "file2.txt").read_bytes() == b"content of file 2"
@pytest.mark.asyncio
async def test_download_to_non_existent_dir(mock_dataset, tmp_path, mocker):
"""Test downloading to a non-existent directory raises an error."""
non_existent_dir = tmp_path / "does_not_exist"
# Mock the _get_download_urls method
mocker.patch.object(
mock_dataset, "_get_download_urls",
return_value=[("file1.txt", "https://example.com/file1.txt")]
)
# Call download method with non-existent path
with pytest.raises(ValueError, match="does not exist"):
await mock_dataset.download(download_path=non_existent_dir, timeout=30)
@pytest.mark.asyncio
async def test_download_to_file_path(mock_dataset, tmp_path, mocker):
"""Test downloading to a file path raises an error."""
# Create the file
file_path = tmp_path / "file.txt"
file_path.touch()
# Mock the _get_download_urls method
mocker.patch.object(
mock_dataset, "_get_download_urls",
return_value=[("file1.txt", "https://example.com/file1.txt")]
)
# Call download method with file path
with pytest.raises(ValueError, match="is not a directory"):
await mock_dataset.download(download_path=file_path, timeout=30)
@pytest.mark.asyncio
async def test_download_to_non_empty_dir(mock_dataset, tmp_path, mocker):
"""Test downloading to a non-empty directory raises an error."""
# Create non-empty directory
non_empty_dir = tmp_path / "non_empty"
non_empty_dir.mkdir()
(non_empty_dir / "existing_file.txt").write_text("existing content")
# Mock the _get_download_urls method
mocker.patch.object(
mock_dataset, "_get_download_urls",
return_value=[("file1.txt", "https://example.com/file1.txt")]
)
# Call download method with non-empty directory
with pytest.raises(ValueError, match="is not empty"):
await mock_dataset.download(download_path=non_empty_dir, timeout=30)
@pytest.mark.asyncio
async def test_download_http_error(httpx_mock: HTTPXMock, mock_dataset, tmp_path, mocker):
"""Test handling HTTP errors during download."""
# Create empty directory
empty_dir = tmp_path / "empty"
empty_dir.mkdir()
# Mock the _get_download_urls method
mocker.patch.object(
mock_dataset, "_get_download_urls",
return_value=[("file1.txt", "https://example.com/file1.txt")]
)
# Mock HTTP error response
httpx_mock.add_response(
url="https://example.com/file1.txt",
status_code=404
)
# Call download method
with pytest.raises(httpx.HTTPStatusError):
await mock_dataset.download(download_path=empty_dir, timeout=30)