Skip to content
7 changes: 7 additions & 0 deletions src/nwb_benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# Determine the path for running tshark
TSHARK_PATH = os.environ.get("TSHARK_PATH", None)
NETWORK_INTERFACE = os.environ.get("NWB_BENCHMARKS_NETWORK_INTERFACE", None)
RUN_DOWNLOAD_BENCHMARKS = os.environ.get("RUN_DOWNLOAD_BENCHMARKS", None)

if TSHARK_PATH is None:
TSHARK_PATH = shutil.which("tshark")
Expand All @@ -20,8 +21,14 @@
warnings.warn("NWB_BENCHMARKS_NETWORK_INTERFACE not found. Set it in the environment.")
print(f"Using tshark at: {TSHARK_PATH} on {NETWORK_INTERFACE}")

if RUN_DOWNLOAD_BENCHMARKS:
warnings.warn(
"RUN_DOWNLOAD_BENCHMARKS is set. Benchmarks that download the entire test file will be run, which may take a long time."
)

__all__ = [
"main",
"TSHARK_PATH",
"NETWORK_INTERFACE",
"RUN_DOWNLOAD_BENCHMARKS",
]
84 changes: 84 additions & 0 deletions src/nwb_benchmarks/benchmarks/params_remote_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from nwb_benchmarks.core import get_https_url

# similar to params_remote_file_reading.py but with follow_redirects=False since download does not work with redirected url

hdf5_params = (
dict(
name="EcephysTestCase",
https_url=get_https_url(
dandiset_id="000717",
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb",
follow_redirects=False,
),
),
dict(
name="OphysTestCase",
https_url=get_https_url(
dandiset_id="000717",
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.nwb",
follow_redirects=False,
),
),
dict(
name="IcephysTestCase",
https_url=get_https_url(
dandiset_id="000717",
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.nwb",
follow_redirects=False,
),
),
)

zarr_params = (
dict(
name="EcephysTestCase",
https_url=get_https_url(
dandiset_id="000719",
dandi_path="sub-npI3_ses-20190421_behavior+ecephys_rechunk.nwb.zarr",
follow_redirects=False,
),
),
dict(
name="OphysTestCase",
https_url=get_https_url(
dandiset_id="000719",
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys_DirectoryStore_rechunked.nwb.zarr",
follow_redirects=False,
),
),
dict(
name="IcephysTestCase",
https_url=get_https_url(
dandiset_id="000719",
dandi_path="icephys_DS_11_21_24/sub-1214579789_ses-1214621812_icephys_DirectoryStore.nwb.zarr",
follow_redirects=False,
),
),
)

lindi_remote_rfs_params = (
dict(
name="EcephysTestCase",
https_url=get_https_url(
dandiset_id="213889",
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json",
follow_redirects=False,
),
),
dict(
name="OphysTestCase",
https_url=get_https_url(
dandiset_id="213889",
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json",
follow_redirects=False,
),
),
dict(
name="IcephysTestCase",
https_url=get_https_url(
dandiset_id="213889",
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json",
follow_redirects=False,
),
),
)
67 changes: 67 additions & 0 deletions src/nwb_benchmarks/benchmarks/time_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Basic benchmarks for timing download of remote NWB files using different methods."""

import shutil

from asv_runner.benchmarks.mark import skip_benchmark_if
from dandi.download import download

from nwb_benchmarks import RUN_DOWNLOAD_BENCHMARKS
from nwb_benchmarks.core import BaseBenchmark
from nwb_benchmarks.setup import get_temporary_directory

from .params_remote_download import hdf5_params, lindi_remote_rfs_params, zarr_params


class BaseDownloadDandiAPIBenchmark(BaseBenchmark):
"""
Base class for timing the download of remote NWB files using the DANDI API.
"""

def setup(self, params: dict[str, str]):
self.tmpdir = get_temporary_directory()
self.teardown(params)

def teardown(self, params: dict[str, str]):
if hasattr(self, "tmpdir"):
shutil.rmtree(path=self.tmpdir.name, ignore_errors=True)
self.tmpdir.cleanup()


class HDF5DownloadDandiAPIBenchmark(BaseDownloadDandiAPIBenchmark):
"""
Time the download of remote HDF5 NWB files using the DANDI API.
"""

params = hdf5_params

# NOTE - these benchmarks download the full file which can take a long time. Only run explicitly using RUN_DOWNLOAD_BENCHMARKS=true when needed.
@skip_benchmark_if(not RUN_DOWNLOAD_BENCHMARKS)
def time_download_hdf5_dandi_api(self, params: dict[str, str]):
"""Download a remote HDF5 NWB file using the DANDI API."""
download(urls=params["https_url"], output_dir=self.tmpdir.name)


class ZarrDownloadDandiAPIBenchmark(BaseDownloadDandiAPIBenchmark):
"""
Time the download of remote Zarr NWB files using the DANDI API.
"""

params = zarr_params

# NOTE - these benchmarks download the full file which can take a long time. Only run explicitly using RUN_DOWNLOAD_BENCHMARKS=true when needed.
@skip_benchmark_if(not RUN_DOWNLOAD_BENCHMARKS)
def time_download_zarr_dandi_api(self, params: dict[str, str]):
"""Download a remote Zarr NWB directory using the DANDI API."""
download(urls=params["https_url"], output_dir=self.tmpdir.name)


class LindiDownloadDandiAPIBenchmark(BaseDownloadDandiAPIBenchmark):
"""
Time the download of a remote LINDI JSON file.
"""

params = lindi_remote_rfs_params

def time_download_lindi_dandi_api(self, params: dict[str, str]):
"""Download a remote Lindi file using the DANDI API."""
download(urls=params["https_url"], output_dir=self.tmpdir.name)
28 changes: 0 additions & 28 deletions src/nwb_benchmarks/benchmarks/time_download_lindi.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/nwb_benchmarks/core/_dandi.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dandi.dandiapi import DandiAPIClient


def get_https_url(dandiset_id: str, dandi_path: str) -> str:
def get_https_url(dandiset_id: str, dandi_path: str, follow_redirects: bool | int = 1) -> str:
"""
Helper function to get S3 url form that fsspec/remfile expect from basic info about a file on DANDI.

Expand All @@ -25,5 +25,5 @@ def get_https_url(dandiset_id: str, dandi_path: str) -> str:
dandiset = client.get_dandiset(dandiset_id=dandiset_id)
asset = dandiset.get_asset_by_path(path=dandi_path)

https_url = asset.get_content_url(follow_redirects=1, strip_query=True)
https_url = asset.get_content_url(follow_redirects=follow_redirects, strip_query=True)
return https_url
Loading