diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 57db378..be95376 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -1,7 +1,10 @@ name: Dev tests on: workflow_call: - secrets: + +env: + TSHARK_PATH: "" + NWB_BENCHMARKS_NETWORK_INTERFACE: "" jobs: @@ -13,27 +16,33 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + defaults: + run: + shell: bash -el {0} steps: - - uses: actions/checkout@v4 - - run: git fetch --prune --unshallow --tags - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: "3.13" - - - name: Global Setup + - name: Clone repository run: | - python -m pip install -U pip - pip install pytest - - name: Install local checkout - run: pip install --no-cache-dir . + git clone https://github.com/neurodatawithoutborders/nwb_benchmarks.git + cd nwb_benchmarks + BRANCH="${GITHUB_HEAD_REF:-$GITHUB_REF_NAME}" + git checkout $BRANCH + + - name: Install special conda environment + uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: nwb_benchmarks + environment-file: nwb_benchmarks/environments/nwb_benchmarks.yaml + auto-activate-base: false - name: Display installed packages and their sources for debugging - run: pip list + run: | + conda info + conda list - name: Run basic file read test - run: nwb_benchmarks run --bench time_remote_file_reading.HDF5PyNWBRemfilePreloadedNoCacheFileReadBenchmark.time_read_hdf5_pynwb_remfile_preloaded_no_cache - + working-directory: nwb_benchmarks + run: nwb_benchmarks run --bench time_remote_file_reading.HDF5PyNWBRemfilePreloadedNoCacheFileReadBenchmark.time_read_hdf5_pynwb_remfile_preloaded_no_cache --debug - name: Run basic slicing test - run: nwb_benchmarks run --bench time_remote_slicing.HDF5PyNWBRemfilePreloadedNoCacheContinuousSliceBenchmark.time_slice + working-directory: nwb_benchmarks + run: nwb_benchmarks run --bench time_remote_slicing.HDF5PyNWBRemfilePreloadedNoCacheContinuousSliceBenchmark.time_slice --debug diff --git a/docs/writing_benchmarks.rst b/docs/writing_benchmarks.rst index e99b2a2..e77d8b1 100644 --- a/docs/writing_benchmarks.rst +++ b/docs/writing_benchmarks.rst @@ -60,13 +60,20 @@ An example of this philosophy in practice would be as follows. In this example w from nwb_benchmarks.core import read_hdf5_h5py_remfile_no_cache class HDF5H5pyFileReadBenchmark: - param_names = ["https_url"] - params = [ - "https://dandiarchive.s3.amazonaws.com/ros3test.nwb", # The original small test NWB file - ] + params = ( + dict( + name="EcephysTestCase", + https_url=get_https_url( + dandiset_id="000717", + dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", + ), + ), + ) - def time_read_hdf5_h5py_remfile_no_cache(self, https_url: str): - self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_h5py_remfile_no_cache(https_url=https_url) + def time_read_hdf5_h5py_remfile_no_cache(self, params: dict[str, str]): + """Read a remote HDF5 file using h5py and remfile without cache.""" + https_url = params["https_url"] + self.file, self.bytestream = read_hdf5_h5py_remfile_no_cache(https_url=https_url) as well as how long it takes to slice ~20 MB of data from the contents of a remote NWB file that has a large amount of series data... @@ -75,27 +82,33 @@ as well as how long it takes to slice ~20 MB of data from the contents of a remo from nwb_benchmarks.core import get_https_url, read_hdf5_pynwb_remfile_no_cache class HDF5PyNWBRemfileNoCacheContinuousSliceBenchmark: - param_names = ["https_url", "object_name", "slice_range"] params = ( - [ - get_https_url( # Yet another helper function for making the NWB file input easier to read + dict( + name="EcephysTestCase1", + https_url=get_https_url( dandiset_id="000717", - dandi_path="sub-IBL-ecephys/sub-IBL-ecephys_ses-3e7ae7c0_desc-18000000-frames-13653-by-384-chunking.nwb", - ) - ], - ["ElectricalSeriesAp"], - [(slice(0, 30_000), slice(0, 384))], # ~23 MB + dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", + ), + object_name="ElectricalSeries", + slice_range=(slice(0, 262_144), slice(0, 384)), # 12 chunks + ), ) - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data - def time_slice(self, https_url: str, object_name: str, slice_range: Tuple[slice]): - """Note: store as self._temp to avoid tracking garbage collection as well.""" + def time_slice(self, params: dict[str, str | Tuple[slice]]): + """Slice a range of a dataset in a remote NWB file.""" + slice_range = params["slice_range"] + # Note: store as self._temp to avoid tracking garbage collection as well. self._temp = self.data_to_slice[slice_range] + Notice how the ``read_hdf5_pynwb_remfile_no_cache`` function (which reads an HDF5-backend ``pynwb.NWBFile`` object into memory using the ``remfile`` method) was used as both the main operation being timed in the first case, then reused in the ``setup`` of the second. By following the redirection of the function to its definition, we find it is itself a compound of another helper function for ``remfile`` usage... .. code-block:: python diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index 61ca374..e072341 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -38,10 +38,10 @@ ) from .params_remote_file_reading import ( - lindi_hdf5_parameter_cases, - lindi_remote_rfs_parameter_cases, - parameter_cases, - zarr_parameter_cases, + hdf5_params, + lindi_hdf5_params, + lindi_remote_rfs_params, + zarr_params, ) @@ -54,10 +54,9 @@ class HDF5H5pyFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to be consistent with timing benchmarks. """ - parameter_cases = parameter_cases + params = hdf5_params - def teardown(self, https_url: str): - # Not all tests in the class are using a temporary dir as cache. Clean up if it does. + def teardown(self, params: dict[str, str]): if hasattr(self, "file"): self.file.close() if hasattr(self, "bytestream"): @@ -67,50 +66,57 @@ def teardown(self, https_url: str): self.tmpdir.cleanup() @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_h5py_fsspec_https_no_cache(self, https_url: str): + def track_network_read_hdf5_h5py_fsspec_https_no_cache(self, params: dict[str, str]): """Read remote HDF5 file using h5py and fsspec with HTTPS without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.file, self.bytestream = read_hdf5_h5py_fsspec_https_no_cache(https_url=https_url) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_h5py_fsspec_https_with_cache(self, https_url: str): + def track_network_read_hdf5_h5py_fsspec_https_with_cache(self, params: dict[str, str]): """Read remote HDF5 file using h5py and fsspec with HTTPS with cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.file, self.bytestream, self.tmpdir = read_hdf5_h5py_fsspec_https_with_cache(https_url=https_url) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_h5py_fsspec_s3_no_cache(self, https_url: str): + def track_network_read_hdf5_h5py_fsspec_s3_no_cache(self, params: dict[str, str]): """Read remote HDF5 file using h5py and fsspec with S3 without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.file, self.bytestream = read_hdf5_h5py_fsspec_s3_no_cache(https_url=https_url) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_h5py_fsspec_s3_with_cache(self, https_url: str): + def track_network_read_hdf5_h5py_fsspec_s3_with_cache(self, params: dict[str, str]): """Read remote HDF5 file using h5py and fsspec with S3 with cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.file, self.bytestream, self.tmpdir = read_hdf5_h5py_fsspec_s3_with_cache(https_url=https_url) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_h5py_remfile_no_cache(self, https_url: str): + def track_network_read_hdf5_h5py_remfile_no_cache(self, params: dict[str, str]): """Read remote HDF5 file using h5py and remfile without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.file, self.bytestream = read_hdf5_h5py_remfile_no_cache(https_url=https_url) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_h5py_remfile_with_cache(self, https_url: str): + def track_network_read_hdf5_h5py_remfile_with_cache(self, params: dict[str, str]): """Read remote HDF5 file using h5py and remfile with cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.file, self.bytestream, self.tmpdir = read_hdf5_h5py_remfile_with_cache(https_url=https_url) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_h5py_ros3(self, https_url: str): + def track_network_read_hdf5_h5py_ros3(self, params: dict[str, str]): """Read remote HDF5 file using h5py and the ROS3 HDF5 driver.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.file, _ = read_hdf5_h5py_ros3(https_url=https_url) return network_tracker.asv_network_statistics @@ -123,10 +129,9 @@ class HDF5PyNWBFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to be consistent with timing benchmarks. """ - parameter_cases = parameter_cases + params = hdf5_params - def teardown(self, https_url: str): - # Not all tests in the class are using a temporary dir as cache. Clean up if it does. + def teardown(self, params: dict[str, str]): if hasattr(self, "file"): self.file.close() if hasattr(self, "bytestream"): @@ -136,8 +141,9 @@ def teardown(self, https_url: str): self.tmpdir.cleanup() @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_https_no_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_https_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with HTTPS without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache( https_url=https_url @@ -145,8 +151,9 @@ def track_network_read_hdf5_pynwb_fsspec_https_no_cache(self, https_url: str): return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_https_with_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_https_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with HTTPS with cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url @@ -154,17 +161,17 @@ def track_network_read_hdf5_pynwb_fsspec_https_with_cache(self, https_url: str): return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_s3_no_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_s3_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with S3 without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: - print("Starting read_hdf5_pynwb_fsspec_s3_no_cache", flush=True) self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) - print("Done read_hdf5_pynwb_fsspec_s3_no_cache", flush=True) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_s3_with_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_s3_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with S3 with cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url @@ -172,15 +179,17 @@ def track_network_read_hdf5_pynwb_fsspec_s3_with_cache(self, https_url: str): return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_remfile_no_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_remfile_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and remfile without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_remfile_with_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_remfile_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and remfile with cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url @@ -188,8 +197,9 @@ def track_network_read_hdf5_pynwb_remfile_with_cache(self, https_url: str): return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_ros3(self, https_url: str): + def track_network_read_hdf5_pynwb_ros3(self, params: dict[str, str]): """Read remote NWB file using pynwb and the ROS3 HDF5 driver.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, _ = read_hdf5_pynwb_ros3(https_url=https_url) return network_tracker.asv_network_statistics @@ -200,14 +210,16 @@ class HDF5PyNWBFsspecHttpsPreloadedNoCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with HTTPS with preloaded data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_https_preloaded_no_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_https_preloaded_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with HTTPS with preloaded data without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache( https_url=https_url @@ -220,16 +232,18 @@ class HDF5PyNWBFsspecHttpsPreloadedWithCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with HTTPS with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_https_preloaded_with_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_https_preloaded_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with HTTPS with preloaded cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url @@ -242,14 +256,16 @@ class HDF5PyNWBFsspecS3PreloadedNoCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with S3 with preloaded data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_s3_preloaded_no_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_s3_preloaded_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with S3 with preloaded data without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) return network_tracker.asv_network_statistics @@ -260,16 +276,18 @@ class HDF5PyNWBFsspecS3PreloadedWithCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with S3 with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_fsspec_s3_preloaded_with_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_fsspec_s3_preloaded_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with S3 with preloaded cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url @@ -282,14 +300,16 @@ class HDF5PyNWBRemfilePreloadedNoCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and remfile with preloaded data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_remfile_preloaded_no_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_remfile_preloaded_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and remfile with preloaded data without cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) return network_tracker.asv_network_statistics @@ -300,16 +320,18 @@ class HDF5PyNWBRemfilePreloadedWithCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and remfile with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_hdf5_pynwb_remfile_preloaded_with_cache(self, https_url: str): + def track_network_read_hdf5_pynwb_remfile_preloaded_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and remfile with preloaded cache.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url @@ -322,21 +344,22 @@ class LindiCreateLocalJSONFileBenchmark(BaseBenchmark): Track the network activity during read of remote HDF5 files and the creation of a LINDI JSON file using lindi. """ - parameter_cases = lindi_hdf5_parameter_cases + params = lindi_hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.lindi_file = os.path.basename(https_url) + ".nwb.lindi.json" - self.teardown(https_url=https_url) + self.teardown(params) - def teardown(self, https_url: str): - """Delete the LINDI JSON file if it exists""" + def teardown(self, params: dict[str, str]): if os.path.exists(self.lindi_file): os.remove(self.lindi_file) # TODO This benchmark takes a long time to index all of the chunks for these files! Do not run until ready @skip_benchmark - def track_network_read_create_lindi_json(self, https_url: str): + def track_network_read_create_lindi_json(self, params: dict[str, str]): """Read a remote HDF5 file to create a LINDI JSON file.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: create_lindi_reference_file_system(https_url=https_url, outfile_path=self.lindi_file) return network_tracker.asv_network_statistics @@ -352,16 +375,16 @@ class LindiLocalJSONFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to be consistent with timing benchmarks. """ - parameter_cases = lindi_remote_rfs_parameter_cases + params = lindi_remote_rfs_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): """Download the LINDI JSON file.""" + https_url = params["https_url"] self.lindi_file = os.path.basename(https_url) + ".lindi.json" - self.teardown(https_url=https_url) + self.teardown(params) download_file(url=https_url, local_path=self.lindi_file) - def teardown(self, https_url: str): - """Delete the LINDI JSON file if it exists.""" + def teardown(self, params: dict[str, str]): if hasattr(self, "io"): self.io.close() if hasattr(self, "client"): @@ -370,14 +393,14 @@ def teardown(self, https_url: str): os.remove(self.lindi_file) @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_lindi_h5py(self, https_url: str): + def track_network_read_lindi_h5py(self, params: dict[str, str]): """Read a remote HDF5 file with h5py using lindi with the local LINDI JSON file.""" with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.client = read_hdf5_h5py_lindi(rfs=self.lindi_file) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_lindi_pynwb(self, https_url: str): + def track_network_read_lindi_pynwb(self, params: dict[str, str]): """Read a remote HDF5 NWB file with pynwb using lindi with the local LINDI JSON file.""" with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io, self.client = read_hdf5_pynwb_lindi(rfs=self.lindi_file) @@ -391,32 +414,36 @@ class ZarrZarrPythonFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to avoid timing garbage collection steps. """ - parameter_cases = zarr_parameter_cases + params = zarr_params @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_zarr_https(self, https_url: str): + def track_network_read_zarr_https(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with HTTPS and consolidated metadata (if available).""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.zarr_file = read_zarr_zarrpython_https(https_url=https_url, open_without_consolidated_metadata=False) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_zarr_https_force_no_consolidated(self, https_url: str): + def track_network_read_zarr_https_force_no_consolidated(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with HTTPS and without using consolidated metadata.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.zarr_file = read_zarr_zarrpython_https(https_url=https_url, open_without_consolidated_metadata=True) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_zarr_s3(self, https_url: str): + def track_network_read_zarr_s3(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with S3 and consolidated metadata (if available).""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.zarr_file = read_zarr_zarrpython_s3(https_url=https_url, open_without_consolidated_metadata=False) return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_zarr_s3_force_no_consolidated(self, https_url: str): + def track_network_read_zarr_s3_force_no_consolidated(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with S3 and without using consolidated metadata.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.zarr_file = read_zarr_zarrpython_s3(https_url=https_url, open_without_consolidated_metadata=True) return network_tracker.asv_network_statistics @@ -429,22 +456,24 @@ class ZarrPyNWBFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to be consistent with timing benchmarks. """ - parameter_cases = zarr_parameter_cases + params = zarr_params - def teardown(self, https_url: str): + def teardown(self, params: dict[str, str]): if hasattr(self, "io"): self.io.close() @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_zarr_pynwb_s3(self, https_url: str): + def track_network_read_zarr_pynwb_s3(self, params: dict[str, str]): """Read a Zarr NWB file using pynwb with S3 and consolidated metadata (if available).""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r") return network_tracker.asv_network_statistics @skip_benchmark_if(TSHARK_PATH is None) - def track_network_read_zarr_pynwb_s3_force_no_consolidated(self, https_url: str): + def track_network_read_zarr_pynwb_s3_force_no_consolidated(self, params: dict[str, str]): """Read a Zarr NWB file using pynwb using S3 and without consolidated metadata.""" + https_url = params["https_url"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r-") return network_tracker.asv_network_statistics diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py index 731e2c3..ed227d4 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py @@ -28,9 +28,9 @@ ) from .params_remote_slicing import ( - lindi_remote_rfs_parameter_cases, - parameter_cases, - zarr_parameter_cases, + hdf5_params, + lindi_remote_rfs_params, + zarr_params, ) @@ -42,7 +42,7 @@ class ContinuousSliceBenchmark(BaseBenchmark, ABC): """ @abstractmethod - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): + def setup(self, params: dict[str, str | Tuple[slice]]): """Set up the benchmark by loading the NWB file and preparing data for slicing. This method must be implemented by subclasses to define how to: @@ -52,7 +52,7 @@ def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): """ pass - def teardown(self, https_url: str, object_name: str, slice_range: Tuple[slice]): + def teardown(self, params: dict[str, str | Tuple[slice]]): if hasattr(self, "io"): self.io.close() if hasattr(self, "file"): @@ -66,8 +66,9 @@ def teardown(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.tmpdir.cleanup() @skip_benchmark_if(TSHARK_PATH is None) - def track_network_during_slice(self, https_url: str, object_name: str, slice_range: Tuple[slice]): + def track_network_during_slice(self, params: dict[str, str | Tuple[slice]]): """Slice a range of a dataset in a remote NWB file.""" + slice_range = params["slice_range"] with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self._temp = self.data_to_slice[slice_range] return network_tracker.asv_network_statistics @@ -79,9 +80,12 @@ class HDF5PyNWBFsspecHttpsNoCacheContinuousSliceBenchmark(ContinuousSliceBenchma cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -92,9 +96,12 @@ class HDF5PyNWBFsspecHttpsWithCacheContinuousSliceBenchmark(ContinuousSliceBench Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and fsspec with HTTPS with cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) @@ -108,9 +115,13 @@ class HDF5PyNWBFsspecHttpsPreloadedNoCacheContinuousSliceBenchmark(ContinuousSli data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -123,9 +134,13 @@ class HDF5PyNWBFsspecHttpsPreloadedWithCacheContinuousSliceBenchmark(ContinuousS cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) @@ -139,9 +154,12 @@ class HDF5PyNWBFsspecS3NoCacheContinuousSliceBenchmark(ContinuousSliceBenchmark) Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and fsspec with S3 without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -152,9 +170,12 @@ class HDF5PyNWBFsspecS3WithCacheContinuousSliceBenchmark(ContinuousSliceBenchmar Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and fsspec with S3 with cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) @@ -168,9 +189,13 @@ class HDF5PyNWBFsspecS3PreloadedNoCacheContinuousSliceBenchmark(ContinuousSliceB data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -183,9 +208,13 @@ class HDF5PyNWBFsspecS3PreloadedWithCacheContinuousSliceBenchmark(ContinuousSlic cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) @@ -199,9 +228,12 @@ class HDF5PyNWBRemfileNoCacheContinuousSliceBenchmark(ContinuousSliceBenchmark): Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and remfile without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -212,9 +244,12 @@ class HDF5PyNWBRemfileWithCacheContinuousSliceBenchmark(ContinuousSliceBenchmark Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and remfile with cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) @@ -228,9 +263,13 @@ class HDF5PyNWBRemfilePreloadedNoCacheContinuousSliceBenchmark(ContinuousSliceBe data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -242,9 +281,13 @@ class HDF5PyNWBRemfilePreloadedWithCacheContinuousSliceBenchmark(ContinuousSlice Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and remfile with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) @@ -258,9 +301,12 @@ class HDF5PyNWBROS3ContinuousSliceBenchmark(ContinuousSliceBenchmark): Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and the ROS3 driver. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, _ = read_hdf5_pynwb_ros3(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -277,9 +323,12 @@ class LindiLocalJSONContinuousSliceBenchmark(ContinuousSliceBenchmark): file because, in that case, the first thing that LINDI does is download the remote file to a temporary directory. """ - parameter_cases = lindi_remote_rfs_parameter_cases + params = lindi_remote_rfs_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.client = read_hdf5_pynwb_lindi(rfs=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -290,9 +339,12 @@ class ZarrPyNWBS3ContinuousSliceBenchmark(ContinuousSliceBenchmark): Time the read of a continuous data slice from remote Zarr NWB files using pynwb with S3. """ - parameter_cases = zarr_parameter_cases + params = zarr_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r") self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -304,9 +356,12 @@ class ZarrPyNWBS3ForceNoConsolidatedContinuousSliceBenchmark(ContinuousSliceBenc consolidated metadata. """ - parameter_cases = zarr_parameter_cases + params = zarr_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r-") self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data diff --git a/src/nwb_benchmarks/benchmarks/params_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/params_remote_file_reading.py index 52d56dc..902eecb 100644 --- a/src/nwb_benchmarks/benchmarks/params_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/params_remote_file_reading.py @@ -1,63 +1,71 @@ from nwb_benchmarks.core import get_https_url -parameter_cases = dict( - EcephysTestCase=dict( +hdf5_params = ( + dict( + name="EcephysTestCase", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", ), ), - OphysTestCase=dict( + dict( + name="OphysTestCase", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.nwb", ), ), - IcephysTestCase=dict( + dict( + name="IcephysTestCase", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.nwb", ), ), - # IBLTestCase1=dict( + # dict( + # name="EcephysTestCaseMock", # https_url=get_https_url(dandiset_id="000717", dandi_path="sub-mock/sub-mock_ses-ecephys1.nwb"), # ), # IBLTestCase2 is not the best example for testing a theory about file read; should probably replace with simpler - # IBLTestCase2=dict( + # dict( + # name="EcephysTestCaseIBL", # https_url=get_https_url( # dandiset_id="000717", # dandi_path="sub-IBL-ecephys/sub-IBL-ecephys_ses-3e7ae7c0_desc-18000000-frames-13653-by-384-chunking.nwb", # ), # ), - # ClassicRos3TestCase=dict(https_url="https://dandiarchive.s3.amazonaws.com/ros3test.nwb"), ) # Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on # the remote server (i.e., we create the LINDI JSON file for these in these tests) -lindi_hdf5_parameter_cases = parameter_cases +lindi_hdf5_params = hdf5_params # Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not # to create the JSON but can load it directly from the remote store -lindi_remote_rfs_parameter_cases = dict( - EcephysTestCase=dict( +lindi_remote_rfs_params = ( + dict( + name="EcephysTestCase", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json", ), ), - OphysTestCase=dict( + dict( + name="OphysTestCase", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json", ), ), - IcephysTestCase=dict( + dict( + name="IcephysTestCase", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json", ), ), - # EcephysTestCase=dict( + # dict( + # name="EcephysTestCaseIBL", # https_url=get_https_url( # dandiset_id="213889", # dandi_path="sub-IBL-ecephys/sub-IBL-ecephys_ses-3e7ae7c0_desc-18000000-frames-13653-by-384-chunking.lindi.json", @@ -66,20 +74,24 @@ ) # TODO Test non-consolidated metadata vs consolidated metadata -zarr_parameter_cases = dict( - EcephysTestCase=dict( +zarr_params = ( + dict( # DANDI: 000719 sub-npI3_ses-20190421_behavior+ecephys_rechunk.nwb.zarr + name="EcephysTestCase", https_url="https://dandiarchive.s3.amazonaws.com/zarr/d097af6b-8fd8-4d83-b649-fc6518e95d25/", ), - OphysTestCase=dict( + dict( # DANDI: 000719 sub-R6_ses-20200206T210000_behavior+ophys_DirectoryStore_rechunked.nwb.zarr + name="OphysTestCase", https_url="https://dandiarchive.s3.amazonaws.com/zarr/c8c6b848-fbc6-4f58-85ff-e3f2618ee983/", ), - IcephysTestCase=dict( + dict( # DANDI: 000719 icephys_DS_11_21_24/sub-1214579789_ses-1214621812_icephys_DirectoryStore.nwb.zarr + name="IcephysTestCase", https_url="https://dandiarchive.s3.amazonaws.com/zarr/18e75d22-f527-4051-a4c8-c7e0f1e7dad1/", ), - # AINDTestCase=dict( + # dict( + # name="EcephysTestCaseAIND", # https_url=( # "s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/" # "ecephys_625749_2022-08-03_15-15-06_nwb/" diff --git a/src/nwb_benchmarks/benchmarks/params_remote_slicing.py b/src/nwb_benchmarks/benchmarks/params_remote_slicing.py index f02ca56..78a55ea 100644 --- a/src/nwb_benchmarks/benchmarks/params_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/params_remote_slicing.py @@ -1,7 +1,8 @@ from nwb_benchmarks.core import get_https_url -parameter_cases = dict( - EcephysTestCase1=dict( +hdf5_params = ( + dict( + name="EcephysTestCase1", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", @@ -9,7 +10,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144), slice(0, 384)), # 12 chunks ), - EcephysTestCase2=dict( + dict( + name="EcephysTestCase2", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", @@ -17,7 +19,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 2), slice(0, 384)), # 24 chunks ), - EcephysTestCase3=dict( + dict( + name="EcephysTestCase3", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", @@ -25,7 +28,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 3), slice(0, 384)), # 36 chunks ), - EcephysTestCase4=dict( + dict( + name="EcephysTestCase4", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", @@ -33,7 +37,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 4), slice(0, 384)), # 48 chunks ), - EcephysTestCase5=dict( + dict( + name="EcephysTestCase5", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb", @@ -41,7 +46,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 5), slice(0, 384)), # 60 chunks ), - OphysTestCase1=dict( + dict( + name="OphysTestCase1", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.nwb", @@ -49,7 +55,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20), slice(0, 796), slice(0, 512)), # 1 chunk ), - OphysTestCase2=dict( + dict( + name="OphysTestCase2", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.nwb", @@ -57,7 +64,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 2), slice(0, 796), slice(0, 512)), # 2 chunks ), - OphysTestCase3=dict( + dict( + name="OphysTestCase3", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.nwb", @@ -65,7 +73,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 3), slice(0, 796), slice(0, 512)), # 3 chunks ), - OphysTestCase4=dict( + dict( + name="OphysTestCase4", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.nwb", @@ -73,7 +82,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 4), slice(0, 796), slice(0, 512)), # 4 chunks ), - OphysTestCase5=dict( + dict( + name="OphysTestCase5", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.nwb", @@ -81,7 +91,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 5), slice(0, 796), slice(0, 512)), # 5 chunks ), - IcephysTestCase1=dict( + dict( + name="IcephysTestCase1", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.nwb", @@ -89,7 +100,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920),), # 10 chunks ), - IcephysTestCase2=dict( + dict( + name="IcephysTestCase2", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.nwb", @@ -97,7 +109,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 2),), # 20 chunks ), - IcephysTestCase3=dict( + dict( + name="IcephysTestCase3", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.nwb", @@ -105,7 +118,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 3),), # 30 chunks ), - IcephysTestCase4=dict( + dict( + name="IcephysTestCase4", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.nwb", @@ -113,7 +127,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 4),), # 40 chunks ), - IcephysTestCase5=dict( + dict( + name="IcephysTestCase5", https_url=get_https_url( dandiset_id="000717", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.nwb", @@ -121,7 +136,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 5),), # 50 chunks ), - # EcephysTestCase=dict( + # dict( + # name="EcephysTestCaseIBL", # https_url=get_https_url( # dandiset_id="000717", # dandi_path="sub-IBL-ecephys/sub-IBL-ecephys_ses-3e7ae7c0_desc-18000000-frames-13653-by-384-chunking.nwb", @@ -132,8 +148,9 @@ ) # Parameters for LINDI pointing to a remote LINDI reference file system JSON file -lindi_remote_rfs_parameter_cases = dict( - EcephysTestCase1=dict( +lindi_remote_rfs_params = ( + dict( + name="EcephysTestCase1", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json", @@ -141,7 +158,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144), slice(0, 384)), # 12 chunks ), - EcephysTestCase2=dict( + dict( + name="EcephysTestCase2", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json", @@ -149,7 +167,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 2), slice(0, 384)), # 24 chunks ), - EcephysTestCase3=dict( + dict( + name="EcephysTestCase3", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json", @@ -157,7 +176,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 3), slice(0, 384)), # 36 chunks ), - EcephysTestCase4=dict( + dict( + name="EcephysTestCase4", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json", @@ -165,7 +185,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 4), slice(0, 384)), # 48 chunks ), - EcephysTestCase5=dict( + dict( + name="EcephysTestCase5", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json", @@ -173,7 +194,8 @@ object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 5), slice(0, 384)), # 60 chunks ), - OphysTestCase1=dict( + dict( + name="OphysTestCase1", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json", @@ -181,7 +203,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20), slice(0, 796), slice(0, 512)), # 1 chunk ), - OphysTestCase2=dict( + dict( + name="OphysTestCase2", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json", @@ -189,7 +212,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 2), slice(0, 796), slice(0, 512)), # 2 chunks ), - OphysTestCase3=dict( + dict( + name="OphysTestCase3", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json", @@ -197,7 +221,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 3), slice(0, 796), slice(0, 512)), # 3 chunks ), - OphysTestCase4=dict( + dict( + name="OphysTestCase4", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json", @@ -205,7 +230,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 4), slice(0, 796), slice(0, 512)), # 4 chunks ), - OphysTestCase5=dict( + dict( + name="OphysTestCase5", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json", @@ -213,7 +239,8 @@ object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 5), slice(0, 796), slice(0, 512)), # 5 chunks ), - IcephysTestCase1=dict( + dict( + name="IcephysTestCase1", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json", @@ -221,7 +248,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920),), # 10 chunks ), - IcephysTestCase2=dict( + dict( + name="IcephysTestCase2", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json", @@ -229,7 +257,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 2),), # 20 chunks ), - IcephysTestCase3=dict( + dict( + name="IcephysTestCase3", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json", @@ -237,7 +266,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 3),), # 30 chunks ), - IcephysTestCase4=dict( + dict( + name="IcephysTestCase4", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json", @@ -245,7 +275,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 4),), # 40 chunks ), - IcephysTestCase5=dict( + dict( + name="IcephysTestCase5", https_url=get_https_url( dandiset_id="213889", dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json", @@ -253,7 +284,8 @@ object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 5),), # 50 chunks ), - # EcephysTestCase=dict( + # dict( + # name="EcephysTestCaseIBL", # https_url=get_https_url( # dandiset_id="213889", # dandi_path="sub-IBL-ecephys/sub-IBL-ecephys_ses-3e7ae7c0_desc-18000000-frames-13653-by-384-chunking.lindi.json", @@ -264,98 +296,114 @@ ) # TODO Test non-consolidated metadata vs consolidated metadata -zarr_parameter_cases = dict( - EcephysTestCase1=dict( +zarr_params = ( + dict( # DANDI: 000719 sub-npI3_ses-20190421_behavior+ecephys_rechunk.nwb.zarr + name="EcephysTestCase1", https_url="https://dandiarchive.s3.amazonaws.com/zarr/d097af6b-8fd8-4d83-b649-fc6518e95d25/", object_name="ElectricalSeries", slice_range=(slice(0, 262_144), slice(0, 384)), # 12 chunks ), - EcephysTestCase2=dict( + dict( # DANDI: 000719 sub-npI3_ses-20190421_behavior+ecephys_rechunk.nwb.zarr + name="EcephysTestCase2", https_url="https://dandiarchive.s3.amazonaws.com/zarr/d097af6b-8fd8-4d83-b649-fc6518e95d25/", object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 2), slice(0, 384)), # 24 chunks ), - EcephysTestCase3=dict( + dict( # DANDI: 000719 sub-npI3_ses-20190421_behavior+ecephys_rechunk.nwb.zarr + name="EcephysTestCase3", https_url="https://dandiarchive.s3.amazonaws.com/zarr/d097af6b-8fd8-4d83-b649-fc6518e95d25/", object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 3), slice(0, 384)), # 36 chunks ), - EcephysTestCase4=dict( + dict( # DANDI: 000719 sub-npI3_ses-20190421_behavior+ecephys_rechunk.nwb.zarr + name="EcephysTestCase4", https_url="https://dandiarchive.s3.amazonaws.com/zarr/d097af6b-8fd8-4d83-b649-fc6518e95d25/", object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 4), slice(0, 384)), # 48 chunks ), - EcephysTestCase5=dict( + dict( # DANDI: 000719 sub-npI3_ses-20190421_behavior+ecephys_rechunk.nwb.zarr + name="EcephysTestCase5", https_url="https://dandiarchive.s3.amazonaws.com/zarr/d097af6b-8fd8-4d83-b649-fc6518e95d25/", object_name="ElectricalSeries", slice_range=(slice(0, 262_144 * 5), slice(0, 384)), # 60 chunks ), - OphysTestCase1=dict( + dict( # DANDI: 000719 sub-R6_ses-20200206T210000_behavior+ophys_DirectoryStore_rechunked.nwb.zarr + name="OphysTestCase1", https_url="https://dandiarchive.s3.amazonaws.com/zarr/c8c6b848-fbc6-4f58-85ff-e3f2618ee983/", object_name="TwoPhotonSeries", slice_range=(slice(0, 20), slice(0, 796), slice(0, 512)), # 1 chunk ), - OphysTestCase2=dict( + dict( # DANDI: 000719 sub-R6_ses-20200206T210000_behavior+ophys_DirectoryStore_rechunked.nwb.zarr + name="OphysTestCase2", https_url="https://dandiarchive.s3.amazonaws.com/zarr/c8c6b848-fbc6-4f58-85ff-e3f2618ee983/", object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 2), slice(0, 796), slice(0, 512)), # 2 chunks ), - OphysTestCase3=dict( + dict( # DANDI: 000719 sub-R6_ses-20200206T210000_behavior+ophys_DirectoryStore_rechunked.nwb.zarr + name="OphysTestCase3", https_url="https://dandiarchive.s3.amazonaws.com/zarr/c8c6b848-fbc6-4f58-85ff-e3f2618ee983/", object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 3), slice(0, 796), slice(0, 512)), # 3 chunks ), - OphysTestCase4=dict( + dict( # DANDI: 000719 sub-R6_ses-20200206T210000_behavior+ophys_DirectoryStore_rechunked.nwb.zarr + name="OphysTestCase4", https_url="https://dandiarchive.s3.amazonaws.com/zarr/c8c6b848-fbc6-4f58-85ff-e3f2618ee983/", object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 4), slice(0, 796), slice(0, 512)), # 4 chunks ), - OphysTestCase5=dict( + dict( # DANDI: 000719 sub-R6_ses-20200206T210000_behavior+ophys_DirectoryStore_rechunked.nwb.zarr + name="OphysTestCase5", https_url="https://dandiarchive.s3.amazonaws.com/zarr/c8c6b848-fbc6-4f58-85ff-e3f2618ee983/", object_name="TwoPhotonSeries", slice_range=(slice(0, 20 * 5), slice(0, 796), slice(0, 512)), # 5 chunks ), - IcephysTestCase1=dict( + dict( # DANDI: 000719 icephys_DS_11_21_24/sub-1214579789_ses-1214621812_icephys_DirectoryStore.nwb.zarr + name="IcephysTestCase1", https_url="https://dandiarchive.s3.amazonaws.com/zarr/18e75d22-f527-4051-a4c8-c7e0f1e7dad1/", object_name="data_00002_AD0", slice_range=(slice(0, 81_920),), # 10 chunks ), - IcephysTestCase2=dict( + dict( # DANDI: 000719 icephys_DS_11_21_24/sub-1214579789_ses-1214621812_icephys_DirectoryStore.nwb.zarr + name="IcephysTestCase2", https_url="https://dandiarchive.s3.amazonaws.com/zarr/18e75d22-f527-4051-a4c8-c7e0f1e7dad1/", object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 2),), # 20 chunks ), - IcephysTestCase3=dict( + dict( # DANDI: 000719 icephys_DS_11_21_24/sub-1214579789_ses-1214621812_icephys_DirectoryStore.nwb.zarr + name="IcephysTestCase3", https_url="https://dandiarchive.s3.amazonaws.com/zarr/18e75d22-f527-4051-a4c8-c7e0f1e7dad1/", object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 3),), # 30 chunks ), - IcephysTestCase4=dict( + dict( # DANDI: 000719 icephys_DS_11_21_24/sub-1214579789_ses-1214621812_icephys_DirectoryStore.nwb.zarr + name="IcephysTestCase4", https_url="https://dandiarchive.s3.amazonaws.com/zarr/18e75d22-f527-4051-a4c8-c7e0f1e7dad1/", object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 4),), # 40 chunks ), - IcephysTestCase5=dict( + dict( # DANDI: 000719 icephys_DS_11_21_24/sub-1214579789_ses-1214621812_icephys_DirectoryStore.nwb.zarr + name="IcephysTestCase5", https_url="https://dandiarchive.s3.amazonaws.com/zarr/18e75d22-f527-4051-a4c8-c7e0f1e7dad1/", object_name="data_00002_AD0", slice_range=(slice(0, 81_920 * 5),), # 50 chunks ), - # AINDTestCase=dict( + # dict( + # name="EcephysTestCaseIBL", # https_url=( # "s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/" # "ecephys_625749_2022-08-03_15-15-06_nwb/" diff --git a/src/nwb_benchmarks/benchmarks/time_download_lindi.py b/src/nwb_benchmarks/benchmarks/time_download_lindi.py index a5db8dc..a41a730 100644 --- a/src/nwb_benchmarks/benchmarks/time_download_lindi.py +++ b/src/nwb_benchmarks/benchmarks/time_download_lindi.py @@ -4,7 +4,7 @@ from nwb_benchmarks.core import BaseBenchmark, download_file -from .params_remote_file_reading import lindi_remote_rfs_parameter_cases +from .params_remote_file_reading import lindi_remote_rfs_params class LindiDownloadBenchmark(BaseBenchmark): @@ -12,15 +12,17 @@ class LindiDownloadBenchmark(BaseBenchmark): Time the download of a remote LINDI JSON file. """ - parameter_cases = lindi_remote_rfs_parameter_cases + params = lindi_remote_rfs_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.lindi_file = os.path.basename(https_url) + ".lindi.json" - self.teardown(https_url=https_url) + self.teardown(params) - def teardown(self, https_url: str): + def teardown(self, params: dict[str, str]): if os.path.exists(self.lindi_file): os.remove(self.lindi_file) - def time_download(self, https_url: str): + def time_download(self, params: dict[str, str]): + https_url = params["https_url"] download_file(url=https_url, local_path=self.lindi_file) diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index 56fd339..23f225f 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -31,10 +31,10 @@ ) from .params_remote_file_reading import ( - lindi_hdf5_parameter_cases, - lindi_remote_rfs_parameter_cases, - parameter_cases, - zarr_parameter_cases, + hdf5_params, + lindi_hdf5_params, + lindi_remote_rfs_params, + zarr_params, ) @@ -47,10 +47,9 @@ class HDF5H5pyFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to avoid timing garbage collection steps. """ - parameter_cases = parameter_cases + params = hdf5_params - def teardown(self, https_url: str): - # Not all tests in the class are using a temporary dir as cache. Clean up if it does. + def teardown(self, params: dict[str, str]): if hasattr(self, "file"): self.file.close() if hasattr(self, "bytestream"): @@ -59,32 +58,39 @@ def teardown(self, https_url: str): shutil.rmtree(path=self.tmpdir.name, ignore_errors=True) self.tmpdir.cleanup() - def time_read_hdf5_h5py_fsspec_https_no_cache(self, https_url: str): + def time_read_hdf5_h5py_fsspec_https_no_cache(self, params: dict[str, str]): """Read a remote HDF5 file using h5py and fsspec with HTTPS without cache.""" + https_url = params["https_url"] self.file, self.bytestream = read_hdf5_h5py_fsspec_https_no_cache(https_url=https_url) - def time_read_hdf5_h5py_fsspec_https_with_cache(self, https_url: str): + def time_read_hdf5_h5py_fsspec_https_with_cache(self, params: dict[str, str]): """Read a remote HDF5 file using h5py and fsspec with HTTPS with cache.""" + https_url = params["https_url"] self.file, self.bytestream, self.tmpdir = read_hdf5_h5py_fsspec_https_with_cache(https_url=https_url) - def time_read_hdf5_h5py_fsspec_s3_no_cache(self, https_url: str): + def time_read_hdf5_h5py_fsspec_s3_no_cache(self, params: dict[str, str]): """Read a remote HDF5 file using h5py and fsspec with S3 without cache.""" + https_url = params["https_url"] self.file, self.bytestream = read_hdf5_h5py_fsspec_s3_no_cache(https_url=https_url) - def time_read_hdf5_h5py_fsspec_s3_with_cache(self, https_url: str): + def time_read_hdf5_h5py_fsspec_s3_with_cache(self, params: dict[str, str]): """Read a remote HDF5 file using h5py and fsspec with S3 with cache.""" + https_url = params["https_url"] self.file, self.bytestream, self.tmpdir = read_hdf5_h5py_fsspec_s3_with_cache(https_url=https_url) - def time_read_hdf5_h5py_remfile_no_cache(self, https_url: str): + def time_read_hdf5_h5py_remfile_no_cache(self, params: dict[str, str]): """Read a remote HDF5 file using h5py and remfile without cache.""" + https_url = params["https_url"] self.file, self.bytestream = read_hdf5_h5py_remfile_no_cache(https_url=https_url) - def time_read_hdf5_h5py_remfile_with_cache(self, https_url: str): + def time_read_hdf5_h5py_remfile_with_cache(self, params: dict[str, str]): """Read a remote HDF5 file using h5py and remfile with cache.""" + https_url = params["https_url"] self.file, self.bytestream, self.tmpdir = read_hdf5_h5py_remfile_with_cache(https_url=https_url) - def time_read_hdf5_h5py_ros3(self, https_url: str): + def time_read_hdf5_h5py_ros3(self, params: dict[str, str]): """Read a remote HDF5 file using h5py and the ROS3 HDF5 driver.""" + https_url = params["https_url"] self.file, _ = read_hdf5_h5py_ros3(https_url=https_url) @@ -95,10 +101,9 @@ class HDF5PyNWBFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to avoid timing garbage collection steps. """ - parameter_cases = parameter_cases + params = hdf5_params - def teardown(self, https_url: str): - # Not all tests in the class are using a temporary dir as cache. Clean up if it does. + def teardown(self, params: dict[str, str]): if hasattr(self, "file"): self.file.close() if hasattr(self, "bytestream"): @@ -107,38 +112,45 @@ def teardown(self, https_url: str): shutil.rmtree(path=self.tmpdir.name, ignore_errors=True) self.tmpdir.cleanup() - def time_read_hdf5_pynwb_fsspec_https_no_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_https_no_cache(self, params: dict[str, str]): """Read a remote HDF5 NWB file using pynwb and fsspec with HTTPS without cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) - def time_read_hdf5_pynwb_fsspec_https_with_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_https_with_cache(self, params: dict[str, str]): """Read a remote HDF5 NWB file using pynwb and fsspec with HTTPS with cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) - def time_read_hdf5_pynwb_fsspec_s3_no_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_s3_no_cache(self, params: dict[str, str]): """Read a remote HDF5 NWB file using pynwb and fsspec with S3 without cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) - def time_read_hdf5_pynwb_fsspec_s3_with_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_s3_with_cache(self, params: dict[str, str]): """Read a remote HDF5 NWB file using pynwb and fsspec with S3 with cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) - def time_read_hdf5_pynwb_remfile_no_cache(self, https_url: str): + def time_read_hdf5_pynwb_remfile_no_cache(self, params: dict[str, str]): """Read a remote HDF5 NWB file using pynwb and remfile without cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) - def time_read_hdf5_pynwb_remfile_with_cache(self, https_url: str): + def time_read_hdf5_pynwb_remfile_with_cache(self, params: dict[str, str]): """Read a remote HDF5 NWB file using pynwb and remfile with cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) - def time_read_hdf5_pynwb_ros3(self, https_url: str): + def time_read_hdf5_pynwb_ros3(self, params: dict[str, str]): """Read a remote HDF5 NWB file using pynwb and the ROS3 HDF5 driver.""" + https_url = params["https_url"] self.nwbfile, self.io, _ = read_hdf5_pynwb_ros3(https_url=https_url) @@ -147,13 +159,15 @@ class HDF5PyNWBFsspecHttpsPreloadedNoCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with HTTPS with preloaded data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) - def time_read_hdf5_pynwb_fsspec_https_preloaded_no_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_https_preloaded_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with HTTPS with preloaded data without cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) @@ -162,15 +176,17 @@ class HDF5PyNWBFsspecHttpsPreloadedWithCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with HTTPS with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) - def time_read_hdf5_pynwb_fsspec_https_preloaded_with_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_https_preloaded_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with HTTPS with preloaded cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) @@ -181,13 +197,15 @@ class HDF5PyNWBFsspecS3PreloadedNoCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with S3 with preloaded data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) - def time_read_hdf5_pynwb_fsspec_s3_preloaded_no_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_s3_preloaded_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with S3 with preloaded data without cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) @@ -196,15 +214,17 @@ class HDF5PyNWBFsspecS3PreloadedWithCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and fsspec with S3 with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) - def time_read_hdf5_pynwb_fsspec_s3_preloaded_with_cache(self, https_url: str): + def time_read_hdf5_pynwb_fsspec_s3_preloaded_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and fsspec with S3 with preloaded cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) @@ -215,13 +235,15 @@ class HDF5PyNWBRemfilePreloadedNoCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and remfile with preloaded data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) - def time_read_hdf5_pynwb_remfile_preloaded_no_cache(self, https_url: str): + def time_read_hdf5_pynwb_remfile_preloaded_no_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and remfile with preloaded data without cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) @@ -230,15 +252,17 @@ class HDF5PyNWBRemfilePreloadedWithCacheFileReadBenchmark(BaseBenchmark): Time the read of remote HDF5 NWB files using pynwb and remfile with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) - def time_read_hdf5_pynwb_remfile_preloaded_with_cache(self, https_url: str): + def time_read_hdf5_pynwb_remfile_preloaded_with_cache(self, params: dict[str, str]): """Read remote NWB file using pynwb and remfile with preloaded cache.""" + https_url = params["https_url"] self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) @@ -249,21 +273,22 @@ class LindiCreateLocalJSONFileBenchmark(BaseBenchmark): Time the read of remote HDF5 files and the creation of a LINDI JSON file using lindi. """ - parameter_cases = lindi_hdf5_parameter_cases + params = lindi_hdf5_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): + https_url = params["https_url"] self.lindi_file = os.path.basename(https_url) + ".nwb.lindi.json" - self.teardown(https_url=https_url) + self.teardown(params) - def teardown(self, https_url: str): - """Delete the LINDI JSON file if it exists""" + def teardown(self, params: dict[str, str]): if os.path.exists(self.lindi_file): os.remove(self.lindi_file) # TODO This benchmark takes a long time to index all of the chunks for these files! Do not run until ready @skip_benchmark - def time_read_create_lindi_json(self, https_url: str): + def time_read_create_lindi_json(self, params: dict[str, str]): """Read a remote HDF5 file to create a LINDI JSON file.""" + https_url = params["https_url"] create_lindi_reference_file_system(https_url=https_url, outfile_path=self.lindi_file) @@ -276,16 +301,16 @@ class LindiLocalJSONFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to avoid timing garbage collection steps. """ - parameter_cases = lindi_remote_rfs_parameter_cases + params = lindi_remote_rfs_params - def setup(self, https_url: str): + def setup(self, params: dict[str, str]): """Download the LINDI JSON file.""" + https_url = params["https_url"] self.lindi_file = os.path.basename(https_url) + ".lindi.json" - self.teardown(https_url=https_url) + self.teardown(params) download_file(url=https_url, local_path=self.lindi_file) - def teardown(self, https_url: str): - """Delete the LINDI JSON file if it exists.""" + def teardown(self, params: dict[str, str]): if hasattr(self, "io"): self.io.close() if hasattr(self, "client"): @@ -293,11 +318,11 @@ def teardown(self, https_url: str): if os.path.exists(self.lindi_file): os.remove(self.lindi_file) - def time_read_lindi_h5py(self, https_url: str): + def time_read_lindi_h5py(self, params: dict[str, str]): """Read a remote HDF5 file with h5py using lindi with the local LINDI JSON file.""" self.client = read_hdf5_h5py_lindi(rfs=self.lindi_file) - def time_read_lindi_pynwb(self, https_url: str): + def time_read_lindi_pynwb(self, params: dict[str, str]): """Read a remote HDF5 NWB file with pynwb using lindi with the local LINDI JSON file.""" self.nwbfile, self.io, self.client = read_hdf5_pynwb_lindi(rfs=self.lindi_file) @@ -309,22 +334,26 @@ class ZarrZarrPythonFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to avoid timing garbage collection steps. """ - parameter_cases = zarr_parameter_cases + params = zarr_params - def time_read_zarr_https(self, https_url: str): + def time_read_zarr_https(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with HTTPS and consolidated metadata (if available).""" + https_url = params["https_url"] self.zarr_file = read_zarr_zarrpython_https(https_url=https_url, open_without_consolidated_metadata=False) - def time_read_zarr_https_force_no_consolidated(self, https_url: str): + def time_read_zarr_https_force_no_consolidated(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with HTTPS and without using consolidated metadata.""" + https_url = params["https_url"] self.zarr_file = read_zarr_zarrpython_https(https_url=https_url, open_without_consolidated_metadata=True) - def time_read_zarr_s3(self, https_url: str): + def time_read_zarr_s3(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with S3 and consolidated metadata (if available).""" + https_url = params["https_url"] self.zarr_file = read_zarr_zarrpython_s3(https_url=https_url, open_without_consolidated_metadata=False) - def time_read_zarr_s3_force_no_consolidated(self, https_url: str): + def time_read_zarr_s3_force_no_consolidated(self, params: dict[str, str]): """Read a Zarr file using Zarr-Python with S3 and without using consolidated metadata.""" + https_url = params["https_url"] self.zarr_file = read_zarr_zarrpython_s3(https_url=https_url, open_without_consolidated_metadata=True) @@ -335,16 +364,18 @@ class ZarrPyNWBFileReadBenchmark(BaseBenchmark): Note: in all cases, store the in-memory objects to avoid timing garbage collection steps. """ - parameter_cases = zarr_parameter_cases + params = zarr_params - def teardown(self, https_url: str): + def teardown(self, params: dict[str, str]): if hasattr(self, "io"): self.io.close() - def time_read_zarr_pynwb_s3(self, https_url: str): + def time_read_zarr_pynwb_s3(self, params: dict[str, str]): """Read a Zarr NWB file using pynwb with S3 and consolidated metadata (if available).""" + https_url = params["https_url"] self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r") - def time_read_zarr_pynwb_s3_force_no_consolidated(self, https_url: str): + def time_read_zarr_pynwb_s3_force_no_consolidated(self, params: dict[str, str]): """Read a Zarr NWB file using pynwb using S3 and without consolidated metadata.""" + https_url = params["https_url"] self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r-") diff --git a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py index 49965f8..8a0a01d 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py @@ -19,9 +19,9 @@ ) from .params_remote_slicing import ( - lindi_remote_rfs_parameter_cases, - parameter_cases, - zarr_parameter_cases, + hdf5_params, + lindi_remote_rfs_params, + zarr_params, ) @@ -33,7 +33,7 @@ class ContinuousSliceBenchmark(BaseBenchmark, ABC): """ @abstractmethod - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): + def setup(self, params: dict[str, str | Tuple[slice]]): """Set up the benchmark by loading the NWB file and preparing data for slicing. This method must be implemented by subclasses to define how to: @@ -43,7 +43,7 @@ def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): """ pass - def teardown(self, https_url: str, object_name: str, slice_range: Tuple[slice]): + def teardown(self, params: dict[str, str | Tuple[slice]]): if hasattr(self, "io"): self.io.close() if hasattr(self, "file"): @@ -56,8 +56,9 @@ def teardown(self, https_url: str, object_name: str, slice_range: Tuple[slice]): shutil.rmtree(path=self.tmpdir.name, ignore_errors=True) self.tmpdir.cleanup() - def time_slice(self, https_url: str, object_name: str, slice_range: Tuple[slice]): + def time_slice(self, params: dict[str, str | Tuple[slice]]): """Slice a range of a dataset in a remote NWB file.""" + slice_range = params["slice_range"] self._temp = self.data_to_slice[slice_range] @@ -67,9 +68,12 @@ class HDF5PyNWBFsspecHttpsNoCacheContinuousSliceBenchmark(ContinuousSliceBenchma cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -80,9 +84,12 @@ class HDF5PyNWBFsspecHttpsWithCacheContinuousSliceBenchmark(ContinuousSliceBench Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and fsspec with HTTPS with cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) @@ -96,9 +103,13 @@ class HDF5PyNWBFsspecHttpsPreloadedNoCacheContinuousSliceBenchmark(ContinuousSli data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_https_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -111,9 +122,13 @@ class HDF5PyNWBFsspecHttpsPreloadedWithCacheContinuousSliceBenchmark(ContinuousS cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_https_with_cache( https_url=https_url ) @@ -127,9 +142,12 @@ class HDF5PyNWBFsspecS3NoCacheContinuousSliceBenchmark(ContinuousSliceBenchmark) Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and fsspec with S3 without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -140,9 +158,12 @@ class HDF5PyNWBFsspecS3WithCacheContinuousSliceBenchmark(ContinuousSliceBenchmar Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and fsspec with S3 with cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) @@ -156,9 +177,13 @@ class HDF5PyNWBFsspecS3PreloadedNoCacheContinuousSliceBenchmark(ContinuousSliceB data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_fsspec_s3_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -171,9 +196,13 @@ class HDF5PyNWBFsspecS3PreloadedWithCacheContinuousSliceBenchmark(ContinuousSlic cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_fsspec_s3_with_cache( https_url=https_url ) @@ -187,9 +216,12 @@ class HDF5PyNWBRemfileNoCacheContinuousSliceBenchmark(ContinuousSliceBenchmark): Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and remfile without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -200,9 +232,12 @@ class HDF5PyNWBRemfileWithCacheContinuousSliceBenchmark(ContinuousSliceBenchmark Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and remfile with cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) @@ -216,9 +251,13 @@ class HDF5PyNWBRemfilePreloadedNoCacheContinuousSliceBenchmark(ContinuousSliceBe data without cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream = read_hdf5_pynwb_remfile_no_cache(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -230,9 +269,13 @@ class HDF5PyNWBRemfilePreloadedWithCacheContinuousSliceBenchmark(ContinuousSlice Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and remfile with preloaded cache. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] + slice_range = params["slice_range"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.file, self.bytestream, self.tmpdir = read_hdf5_pynwb_remfile_with_cache( https_url=https_url ) @@ -246,9 +289,12 @@ class HDF5PyNWBROS3ContinuousSliceBenchmark(ContinuousSliceBenchmark): Time the read of a continuous data slice from remote HDF5 NWB files using pynwb and the ROS3 driver. """ - parameter_cases = parameter_cases + params = hdf5_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, _ = read_hdf5_pynwb_ros3(https_url=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -265,9 +311,12 @@ class LindiLocalJSONContinuousSliceBenchmark(ContinuousSliceBenchmark): file because, in that case, the first thing that LINDI does is download the remote file to a temporary directory. """ - parameter_cases = lindi_remote_rfs_parameter_cases + params = lindi_remote_rfs_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.client = read_hdf5_pynwb_lindi(rfs=https_url) self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -278,9 +327,12 @@ class ZarrPyNWBS3ContinuousSliceBenchmark(ContinuousSliceBenchmark): Time the read of a continuous data slice from remote Zarr NWB files using pynwb with S3. """ - parameter_cases = zarr_parameter_cases + params = zarr_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r") self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data @@ -292,9 +344,12 @@ class ZarrPyNWBS3ForceNoConsolidatedContinuousSliceBenchmark(ContinuousSliceBenc consolidated metadata. """ - parameter_cases = zarr_parameter_cases + params = zarr_params + + def setup(self, params: dict[str, str | Tuple[slice]]): + https_url = params["https_url"] + object_name = params["object_name"] - def setup(self, https_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io = read_zarr_pynwb_s3(https_url=https_url, mode="r-") self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data diff --git a/src/nwb_benchmarks/command_line_interface.py b/src/nwb_benchmarks/command_line_interface.py index c56e3d1..f7fc03b 100644 --- a/src/nwb_benchmarks/command_line_interface.py +++ b/src/nwb_benchmarks/command_line_interface.py @@ -121,14 +121,13 @@ def main() -> None: ), f"A single intermediate result was not found in {intermediate_results_folder}. Please raise an issue." raw_results_file_path = globbed_json_file_paths[0] - if debug_mode: - raw_results_file_path.unlink() - else: - reduce_results( - machine_id=machine_id, - raw_results_file_path=raw_results_file_path, - raw_environment_info_file_path=raw_environment_info_file_path, - ) + reduce_results( + machine_id=machine_id, + raw_results_file_path=raw_results_file_path, + raw_environment_info_file_path=raw_environment_info_file_path, + ) + + if not debug_mode: upload_results() finally: clean_cache() diff --git a/src/nwb_benchmarks/core/_base_benchmark.py b/src/nwb_benchmarks/core/_base_benchmark.py index c1ef253..c2acc47 100644 --- a/src/nwb_benchmarks/core/_base_benchmark.py +++ b/src/nwb_benchmarks/core/_base_benchmark.py @@ -1,68 +1,6 @@ -import itertools -from typing import Any, Dict, Self, Union - -import asv_runner - - class BaseBenchmark: - """ - The ASV convention is to specify parametrized fields as a pair of `param_names` and `params`. - - This was deemed less readable than a single dictionary of parameter cases. - - So inheriting from a `BaseBenchmark` allows us to specify parameter cases as the form... - - parameter_cases = dict( - ParameterCase1=dict( - parameter1=value1, - parameter2=value2, - ... - ), - ParameterCase2=dict( - parameter1=value3, - parameter2=value4, - ... - ), - ... - ) - - ...and this will be unpacked into the expected form by ASV. - """ + """Base class for NWB benchmarks.""" rounds = 1 repeat = 1 - parameter_cases: Union[Dict[str, Dict[str, Any]], None] = None - - def __new__(cls, *args, **kwargs) -> Self: - instance = super().__new__(cls) - - # Unpack parameter cases dictionary into ASV expected form - if cls.parameter_cases is not None: - cls.param_names = list(next(iter(cls.parameter_cases.values())).keys()) - cls.params = [ - [parameter_case[param_name] for parameter_case in cls.parameter_cases.values()] - for param_name in cls.param_names - ] - - # ASV automatically forms a cartesian product over all params - # But we want our `parameter_names` usage to be flat in order to be more explicit - # So use the skip decorator to exclude the 'off-diagonal' parts of a square product - # These will still show up in the console display table, but will have n/a as display value - # And values of samples in intermediate results will be saved to JSON as `null` - cartesian_params = itertools.product(*cls.params) - desired_params = [ - tuple(parameter_case[parameter_name] for parameter_name in cls.param_names) - for parameter_case in cls.parameter_cases.values() - ] - non_cartesian_exclusion = [ - cartesian_param for cartesian_param in cartesian_params if cartesian_param not in desired_params - ] - - for attr_name in dir(cls): - attr = getattr(cls, attr_name) - if callable(attr) and any( - attr_name.startswith(prefix) for prefix in ["time_", "track_"] - ): # Add more when needed - setattr(cls, attr_name, asv_runner.benchmarks.mark.skip_for_params(non_cartesian_exclusion)(attr)) - - return instance + warmup_time = 0.0 diff --git a/src/nwb_benchmarks/globals.py b/src/nwb_benchmarks/globals.py index 98b2610..f505b95 100644 --- a/src/nwb_benchmarks/globals.py +++ b/src/nwb_benchmarks/globals.py @@ -3,7 +3,7 @@ from .setup import get_benchmarks_home_directory MACHINE_FILE_VERSION = "1.4.1" -DATABASE_VERSION = "2.2.0" +DATABASE_VERSION = "3.0.0" HOME_DIR = get_benchmarks_home_directory() RESULTS_DIR = HOME_DIR / "results" diff --git a/src/nwb_benchmarks/setup/_configure_machine.py b/src/nwb_benchmarks/setup/_configure_machine.py index be06ec5..35e8b8a 100644 --- a/src/nwb_benchmarks/setup/_configure_machine.py +++ b/src/nwb_benchmarks/setup/_configure_machine.py @@ -64,6 +64,8 @@ def collect_machine_info() -> Dict[str, Dict[str, Any]]: pass except Exception as exception: raise exception + if len(machine_info["cuda"]) == 0: + del machine_info["cuda"] # Config info config = read_config() @@ -79,22 +81,22 @@ def collect_machine_info() -> Dict[str, Dict[str, Any]]: if default_asv_machine_file_path.exists(): with open(file=default_asv_machine_file_path, mode="r") as file_stream: asv_machine_info = json.load(fp=file_stream) - machine_info["asv"] = asv_machine_info - - # Some info in ASV may be considered 'private' - if len(asv_machine_info.keys()) != 2: - message = ( - f"\nThe ASV machine file at {default_asv_machine_file_path} should only contain two keys: " - "'version' and the machine name. " - f"Found {len(asv_machine_info.keys())} keys: {list(asv_machine_info.keys())}' " - "Please raise an issue at https://github.com/NeurodataWithoutBorders/nwb_benchmarks/issues/new to report." - ) - raise ValueError(message) - - asv_machine_key = next(key for key in asv_machine_info.keys() if key != "version") - asv_machine_details = asv_machine_info[asv_machine_key] - del asv_machine_details["machine"] - machine_info["asv"] = asv_machine_details + machine_info["asv"] = asv_machine_info + + # Some info in ASV may be considered 'private' + if len(asv_machine_info.keys()) != 2: + message = ( + f"\nThe ASV machine file at {default_asv_machine_file_path} should only contain two keys: " + "'version' and the machine name. " + f"Found {len(asv_machine_info.keys())} keys: {list(asv_machine_info.keys())}' " + "Please raise an issue at https://github.com/NeurodataWithoutBorders/nwb_benchmarks/issues/new to report." + ) + raise ValueError(message) + + asv_machine_key = next(key for key in asv_machine_info.keys() if key != "version") + asv_machine_details = asv_machine_info[asv_machine_key] + del asv_machine_details["machine"] + machine_info["asv"] = asv_machine_details return machine_info diff --git a/src/nwb_benchmarks/setup/_reduce_results.py b/src/nwb_benchmarks/setup/_reduce_results.py index cf70f9e..b0aae4f 100644 --- a/src/nwb_benchmarks/setup/_reduce_results.py +++ b/src/nwb_benchmarks/setup/_reduce_results.py @@ -15,7 +15,7 @@ from ..utils import get_dictionary_checksum -def _parse_environment_info(raw_environment_info: List[str]) -> Dict[str, List[[Dict[str, str]]]]: +def _parse_environment_info(raw_environment_info: List[str]) -> Dict[str, List[Dict[str, str]]]: """Turn the results of `conda list` printout to a JSON dictionary.""" header_stripped = raw_environment_info[3:] newline_stripped = [line.rstrip("\n") for line in header_stripped] @@ -48,32 +48,23 @@ def reduce_results(machine_id: str, raw_results_file_path: pathlib.Path, raw_env if len(raw_results_list) != 12: continue - flattened_joint_params = collections.defaultdict(list) - for parameter_names in raw_results_list[1]: - for parameter_value_index, parameter_value in enumerate(parameter_names): - flattened_joint_params[parameter_value_index].append(parameter_value) - serialized_flattened_joint_params = [ - str(tuple(joint_params)) for joint_params in flattened_joint_params.values() - ] + # This code assumes that test cases are only run with one parameter + assert len(raw_results_list[1]) == 1, "Unexpected length of serialized parameters list!" + serialized_params = raw_results_list[1][0] # Skipped results in JSON are writen as `null` and read back into Python as `None` non_skipped_results = [result for result in raw_results_list[11] if result is not None] - if len(serialized_flattened_joint_params) != len(non_skipped_results): + if len(serialized_params) != len(non_skipped_results): message = ( f"In intermediate results for test case {test_case}: \n" - f"\tLength mismatch between flattened joint parameters ({len(serialized_flattened_joint_params)}) and " + f"\tLength mismatch between parameters ({len(serialized_params)}) and " f"result samples ({len(non_skipped_results)})!\n\n" "Please raise an issue and share your intermediate results file." ) warnings.warn(message=message) else: reduced_results.update( - { - test_case: { - joint_params: raw_result - for joint_params, raw_result in zip(serialized_flattened_joint_params, non_skipped_results) - } - } + {test_case: {params: raw_result for params, raw_result in zip(serialized_params, non_skipped_results)}} ) if len(reduced_results) == 0: