Skip to content

Commit aa31c44

Browse files
katoshflying-sheepilan-gold
authored
fix: use urlretrieve for reliable download in concat_on_disk doctest (scverse#2280)
Co-authored-by: Philipp A. <flying-sheep@web.de> Co-authored-by: Ilan Gold <ilanbassgold@gmail.com>
1 parent 981174b commit aa31c44

3 files changed

Lines changed: 35 additions & 24 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,10 @@ test-min = [
9696
"joblib",
9797
"boltons",
9898
"scanpy>=1.10",
99-
# TODO: Is 1.0dev1 a real pre-release? https://pypi.org/project/httpx/#history
100-
"httpx<1.0", # For data downloading
10199
"dask[distributed]",
102100
"awkward>=2.6.3",
103101
"pyarrow",
102+
"pooch",
104103
"anndata[dask]",
105104
]
106105

src/anndata/experimental/backed/_io.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,19 +62,25 @@ def read_lazy(
6262
Preparing example objects
6363
6464
>>> import anndata as ad
65-
>>> from urllib.request import urlretrieve
65+
>>> import pooch
6666
>>> import scanpy as sc
6767
>>> base_url = "https://datasets.cellxgene.cziscience.com"
68-
>>> def get_cellxgene_data(id_: str):
69-
... out_path = sc.settings.datasetdir / f"{id_}.h5ad"
70-
... if out_path.exists():
71-
... return out_path
72-
... file_url = f"{base_url}/{id_}.h5ad"
73-
... sc.settings.datasetdir.mkdir(parents=True, exist_ok=True)
74-
... urlretrieve(file_url, out_path)
75-
... return out_path
76-
>>> path_b_cells = get_cellxgene_data("a93eab58-3d82-4b61-8a2f-d7666dcdb7c4")
77-
>>> path_fetal = get_cellxgene_data("d170ff04-6da0-4156-a719-f8e1bbefbf53")
68+
>>> # To update hashes: pooch.retrieve(url, known_hash=None) prints the new hash
69+
>>> def get_cellxgene_data(id_: str, hash_: str):
70+
... return pooch.retrieve(
71+
... f"{base_url}/{id_}.h5ad",
72+
... known_hash=hash_,
73+
... fname=f"{id_}.h5ad",
74+
... path=sc.settings.datasetdir,
75+
... )
76+
>>> path_b_cells = get_cellxgene_data(
77+
... "a93eab58-3d82-4b61-8a2f-d7666dcdb7c4",
78+
... "sha256:dac90fe2aa8b78aee2c1fc963104592f8eff7b873ca21d01a51a5e416734651c",
79+
... )
80+
>>> path_fetal = get_cellxgene_data(
81+
... "d170ff04-6da0-4156-a719-f8e1bbefbf53",
82+
... "sha256:d497eebca03533919877b6fc876e8c9d8ba063199ddc86dd9fbcb9d1d87a3622",
83+
... )
7884
>>> b_cells_adata = ad.experimental.read_lazy(path_b_cells)
7985
>>> fetal_adata = ad.experimental.read_lazy(path_fetal)
8086
>>> print(b_cells_adata)

src/anndata/experimental/merge.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -556,19 +556,25 @@ def concat_on_disk( # noqa: PLR0913
556556
557557
First, let’s get some “big” datasets with a compatible ``var`` axis:
558558
559-
>>> import httpx
559+
>>> import pooch
560560
>>> import scanpy as sc
561561
>>> base_url = "https://datasets.cellxgene.cziscience.com"
562-
>>> def get_cellxgene_data(id_: str):
563-
... out_path = sc.settings.datasetdir / f'{id_}.h5ad'
564-
... if out_path.exists():
565-
... return out_path
566-
... file_url = f"{base_url}/{id_}.h5ad"
567-
... sc.settings.datasetdir.mkdir(parents=True, exist_ok=True)
568-
... out_path.write_bytes(httpx.get(file_url).content)
569-
... return out_path
570-
>>> path_b_cells = get_cellxgene_data('a93eab58-3d82-4b61-8a2f-d7666dcdb7c4')
571-
>>> path_fetal = get_cellxgene_data('d170ff04-6da0-4156-a719-f8e1bbefbf53')
562+
>>> # To update hashes: pooch.retrieve(url, known_hash=None) prints the new hash
563+
>>> def get_cellxgene_data(id_: str, hash_: str):
564+
... return pooch.retrieve(
565+
... f"{base_url}/{id_}.h5ad",
566+
... known_hash=hash_,
567+
... fname=f"{id_}.h5ad",
568+
... path=sc.settings.datasetdir,
569+
... )
570+
>>> path_b_cells = get_cellxgene_data(
571+
... 'a93eab58-3d82-4b61-8a2f-d7666dcdb7c4',
572+
... 'sha256:dac90fe2aa8b78aee2c1fc963104592f8eff7b873ca21d01a51a5e416734651c',
573+
... )
574+
>>> path_fetal = get_cellxgene_data(
575+
... 'd170ff04-6da0-4156-a719-f8e1bbefbf53',
576+
... 'sha256:d497eebca03533919877b6fc876e8c9d8ba063199ddc86dd9fbcb9d1d87a3622',
577+
... )
572578
573579
Now we can concatenate them on-disk:
574580

0 commit comments

Comments
 (0)