1+ import os
12from pathlib import Path
23
34import pytest
@@ -13,13 +14,15 @@ def __init__(self, name, generation, metadata=None, contents=b"dataset"):
1314 self .metadata = metadata
1415 self .contents = contents
1516 self .download_count = 0
17+ self .download_filenames = []
1618 self .reload_count = 0
1719
1820 def reload (self ):
1921 self .reload_count += 1
2022
2123 def download_to_filename (self , filename ):
2224 self .download_count += 1
25+ self .download_filenames .append (filename )
2326 Path (filename ).write_bytes (self .contents )
2427
2528
@@ -187,3 +190,33 @@ def test_materialize_gcs_dataset_url_reuses_cached_file(monkeypatch, tmp_path):
187190 assert first_path == second_path
188191 assert Path (second_path ).read_bytes () == b"current"
189192 assert current_blob .download_count == 1
193+
194+
195+ def test_download_blob_uses_unique_temp_path_for_each_download (monkeypatch , tmp_path ):
196+ local_path = tmp_path / "cache" / "file.h5"
197+ created_temp_paths = []
198+
199+ def fake_mkstemp (* , prefix , suffix , dir ):
200+ temporary_path = Path (dir ) / f"{ prefix } { len (created_temp_paths )} { suffix } "
201+ fd = os .open (temporary_path , os .O_CREAT | os .O_EXCL | os .O_RDWR , 0o600 )
202+ created_temp_paths .append (temporary_path )
203+ return fd , str (temporary_path )
204+
205+ monkeypatch .setattr (dataset_sources .tempfile , "mkstemp" , fake_mkstemp )
206+ blob = FakeBlob ("data/file.h5" , 444 , contents = b"first" )
207+
208+ dataset_sources ._download_blob (blob , local_path )
209+ local_path .unlink ()
210+ blob .contents = b"second"
211+ dataset_sources ._download_blob (blob , local_path )
212+
213+ assert [
214+ Path (filename ) for filename in blob .download_filenames
215+ ] == created_temp_paths
216+ assert len (set (created_temp_paths )) == 2
217+ assert all (
218+ temporary_path .parent == local_path .parent
219+ for temporary_path in created_temp_paths
220+ )
221+ assert all (not temporary_path .exists () for temporary_path in created_temp_paths )
222+ assert local_path .read_bytes () == b"second"
0 commit comments