Skip to content

Commit f0f37b1

Browse files
authored
Merge pull request #175 from NeurodataWithoutBorders/lindi_download
2 parents e625cf1 + daa1fc2 commit f0f37b1

15 files changed

+267
-131
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import os
2+
3+
from asv_runner.benchmarks.mark import skip_benchmark
4+
5+
from nwb_benchmarks import TSHARK_PATH
6+
from nwb_benchmarks.core import (
7+
BaseBenchmark,
8+
create_lindi_reference_file_system,
9+
network_activity_tracker,
10+
)
11+
12+
from .params_remote_file_reading import hdf5_params
13+
14+
15+
class LindiCreateJSONFromRemoteFileBenchmark(BaseBenchmark):
16+
"""
17+
Track the network activity during the creation of a LINDI JSON file for a remote NWB HDF5 file using lindi.
18+
"""
19+
20+
params = hdf5_params
21+
22+
def setup(self, params: dict[str, str]):
23+
https_url = params["https_url"]
24+
self.lindi_file = os.path.basename(https_url) + ".nwb.lindi.json"
25+
self.teardown(params)
26+
27+
def teardown(self, params: dict[str, str]):
28+
if os.path.exists(self.lindi_file):
29+
os.remove(self.lindi_file)
30+
31+
# TODO This benchmark takes a long time to index all of the chunks for these files! Do not run until ready
32+
@skip_benchmark
33+
def track_network_create_lindi_json(self, params: dict[str, str]):
34+
"""Read a remote HDF5 file to create a LINDI JSON file."""
35+
https_url = params["https_url"]
36+
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
37+
create_lindi_reference_file_system(https_url=https_url, outfile_path=self.lindi_file)
38+
return network_tracker.asv_network_statistics

src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@
55
network activity tracker.
66
"""
77

8-
import os
98
import shutil
109

11-
from asv_runner.benchmarks.mark import skip_benchmark, skip_benchmark_if
10+
from asv_runner.benchmarks.mark import skip_benchmark_if
1211

1312
from nwb_benchmarks import TSHARK_PATH
1413
from nwb_benchmarks.core import (
1514
BaseBenchmark,
16-
create_lindi_reference_file_system,
17-
download_file,
15+
download_asset_if_not_exists,
1816
network_activity_tracker,
1917
read_hdf5_h5py_fsspec_https_no_cache,
2018
read_hdf5_h5py_fsspec_https_with_cache,
@@ -39,7 +37,6 @@
3937

4038
from .params_remote_file_reading import (
4139
hdf5_params,
42-
lindi_hdf5_params,
4340
lindi_remote_rfs_params,
4441
zarr_params,
4542
)
@@ -339,38 +336,13 @@ def track_network_read_hdf5_pynwb_remfile_preloaded_with_cache(self, params: dic
339336
return network_tracker.asv_network_statistics
340337

341338

342-
class LindiCreateLocalJSONFileBenchmark(BaseBenchmark):
343-
"""
344-
Track the network activity during read of remote HDF5 files and the creation of a LINDI JSON file using lindi.
345-
"""
346-
347-
params = lindi_hdf5_params
348-
349-
def setup(self, params: dict[str, str]):
350-
https_url = params["https_url"]
351-
self.lindi_file = os.path.basename(https_url) + ".nwb.lindi.json"
352-
self.teardown(params)
353-
354-
def teardown(self, params: dict[str, str]):
355-
if os.path.exists(self.lindi_file):
356-
os.remove(self.lindi_file)
357-
358-
# TODO This benchmark takes a long time to index all of the chunks for these files! Do not run until ready
359-
@skip_benchmark
360-
def track_network_read_create_lindi_json(self, params: dict[str, str]):
361-
"""Read a remote HDF5 file to create a LINDI JSON file."""
362-
https_url = params["https_url"]
363-
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
364-
create_lindi_reference_file_system(https_url=https_url, outfile_path=self.lindi_file)
365-
return network_tracker.asv_network_statistics
366-
367-
368339
class LindiLocalJSONFileReadBenchmark(BaseBenchmark):
369340
"""
370341
Track the network activity during read of remote HDF5 files by reading the local LINDI JSON files with lindi and
371342
h5py or pynwb.
372343
373-
This downloads the already created remote LINDI JSON files during setup.
344+
This downloads the remote LINDI JSON file during setup if it does not already exist in the persistent download
345+
directory.
374346
375347
Note: in all cases, store the in-memory objects to be consistent with timing benchmarks.
376348
"""
@@ -380,17 +352,13 @@ class LindiLocalJSONFileReadBenchmark(BaseBenchmark):
380352
def setup(self, params: dict[str, str]):
381353
"""Download the LINDI JSON file."""
382354
https_url = params["https_url"]
383-
self.lindi_file = os.path.basename(https_url) + ".lindi.json"
384-
self.teardown(params)
385-
download_file(url=https_url, local_path=self.lindi_file)
355+
self.lindi_file = download_asset_if_not_exists(https_url=https_url)
386356

387357
def teardown(self, params: dict[str, str]):
388358
if hasattr(self, "io"):
389359
self.io.close()
390360
if hasattr(self, "client"):
391361
self.client.close()
392-
if os.path.exists(self.lindi_file):
393-
os.remove(self.lindi_file)
394362

395363
@skip_benchmark_if(TSHARK_PATH is None)
396364
def track_network_read_lindi_h5py(self, params: dict[str, str]):

src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
from nwb_benchmarks import TSHARK_PATH
1515
from nwb_benchmarks.core import (
1616
BaseBenchmark,
17+
download_read_hdf5_pynwb_lindi,
1718
get_object_by_name,
1819
network_activity_tracker,
1920
read_hdf5_pynwb_fsspec_https_no_cache,
2021
read_hdf5_pynwb_fsspec_https_with_cache,
2122
read_hdf5_pynwb_fsspec_s3_no_cache,
2223
read_hdf5_pynwb_fsspec_s3_with_cache,
23-
read_hdf5_pynwb_lindi,
2424
read_hdf5_pynwb_remfile_no_cache,
2525
read_hdf5_pynwb_remfile_with_cache,
2626
read_hdf5_pynwb_ros3,
@@ -336,21 +336,41 @@ class LindiLocalJSONContinuousSliceBenchmark(ContinuousSliceBenchmark):
336336
Time the read of a continuous data slice from remote HDF5 NWB files by reading the local LINDI JSON files with
337337
lindi and pynwb.
338338
339-
This downloads the already created remote LINDI JSON files during setup.
339+
This downloads the remote LINDI JSON file during setup if it does not already exist in the persistent download
340+
directory.
341+
"""
342+
343+
params = lindi_remote_rfs_params
344+
345+
def setup(self, params: dict[str, str | Tuple[slice]]):
346+
https_url = params["https_url"]
347+
object_name = params["object_name"]
348+
349+
self.nwbfile, self.io, self.client = download_read_hdf5_pynwb_lindi(https_url=https_url)
350+
self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name)
351+
self.data_to_slice = self.neurodata_object.data
352+
353+
354+
class LindiLocalJSONPreloadedContinuousSliceBenchmark(ContinuousSliceBenchmark):
355+
"""
356+
Time the read of a continuous data slice from remote HDF5 NWB files by reading the local LINDI JSON files with
357+
lindi and pynwb after preloading the data into any caches.
340358
341-
This should be about the same as reading a data slice from an NWB file that is instantiated with a remote LINDI JSON
342-
file because, in that case, the first thing that LINDI does is download the remote file to a temporary directory.
359+
This downloads the remote LINDI JSON file during setup if it does not already exist in the persistent download
360+
directory.
343361
"""
344362

345363
params = lindi_remote_rfs_params
346364

347365
def setup(self, params: dict[str, str | Tuple[slice]]):
348366
https_url = params["https_url"]
349367
object_name = params["object_name"]
368+
slice_range = params["slice_range"]
350369

351-
self.nwbfile, self.io, self.client = read_hdf5_pynwb_lindi(rfs=https_url)
370+
self.nwbfile, self.io, self.client = download_read_hdf5_pynwb_lindi(https_url=https_url)
352371
self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name)
353372
self.data_to_slice = self.neurodata_object.data
373+
self._temp = self.data_to_slice[slice_range]
354374

355375

356376
class ZarrPyNWBS3ContinuousSliceBenchmark(ContinuousSliceBenchmark):

src/nwb_benchmarks/benchmarks/params_remote_file_reading.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@
3636
# ),
3737
)
3838

39-
# Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on
40-
# the remote server (i.e., we create the LINDI JSON file for these in these tests)
41-
lindi_hdf5_params = hdf5_params
42-
4339
# Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not
4440
# to create the JSON but can load it directly from the remote store
4541
lindi_remote_rfs_params = (
@@ -48,20 +44,23 @@
4844
https_url=get_https_url(
4945
dandiset_id="213889",
5046
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json",
47+
follow_redirects=False,
5148
),
5249
),
5350
dict(
5451
name="OphysTestCase",
5552
https_url=get_https_url(
5653
dandiset_id="213889",
5754
dandi_path="sub-R6/sub-R6_behavior+ophys.nwb.lindi.json",
55+
follow_redirects=False,
5856
),
5957
),
6058
dict(
6159
name="IcephysTestCase",
6260
https_url=get_https_url(
6361
dandiset_id="213889",
6462
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json",
63+
follow_redirects=False,
6564
),
6665
),
6766
# dict(

src/nwb_benchmarks/benchmarks/params_remote_slicing.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@
154154
https_url=get_https_url(
155155
dandiset_id="213889",
156156
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json",
157+
follow_redirects=False,
157158
),
158159
object_name="ElectricalSeries",
159160
slice_range=(slice(0, 262_144), slice(0, 384)), # 12 chunks
@@ -163,6 +164,7 @@
163164
https_url=get_https_url(
164165
dandiset_id="213889",
165166
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json",
167+
follow_redirects=False,
166168
),
167169
object_name="ElectricalSeries",
168170
slice_range=(slice(0, 262_144 * 2), slice(0, 384)), # 24 chunks
@@ -172,6 +174,7 @@
172174
https_url=get_https_url(
173175
dandiset_id="213889",
174176
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json",
177+
follow_redirects=False,
175178
),
176179
object_name="ElectricalSeries",
177180
slice_range=(slice(0, 262_144 * 3), slice(0, 384)), # 36 chunks
@@ -181,6 +184,7 @@
181184
https_url=get_https_url(
182185
dandiset_id="213889",
183186
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json",
187+
follow_redirects=False,
184188
),
185189
object_name="ElectricalSeries",
186190
slice_range=(slice(0, 262_144 * 4), slice(0, 384)), # 48 chunks
@@ -190,6 +194,7 @@
190194
https_url=get_https_url(
191195
dandiset_id="213889",
192196
dandi_path="sub-npI3/sub-npI3_behavior+ecephys.nwb.lindi.json",
197+
follow_redirects=False,
193198
),
194199
object_name="ElectricalSeries",
195200
slice_range=(slice(0, 262_144 * 5), slice(0, 384)), # 60 chunks
@@ -198,7 +203,8 @@
198203
name="OphysTestCase1",
199204
https_url=get_https_url(
200205
dandiset_id="213889",
201-
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json",
206+
dandi_path="sub-R6/sub-R6_behavior+ophys.nwb.lindi.json",
207+
follow_redirects=False,
202208
),
203209
object_name="TwoPhotonSeries",
204210
slice_range=(slice(0, 20), slice(0, 796), slice(0, 512)), # 1 chunk
@@ -207,7 +213,8 @@
207213
name="OphysTestCase2",
208214
https_url=get_https_url(
209215
dandiset_id="213889",
210-
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json",
216+
dandi_path="sub-R6/sub-R6_behavior+ophys.nwb.lindi.json",
217+
follow_redirects=False,
211218
),
212219
object_name="TwoPhotonSeries",
213220
slice_range=(slice(0, 20 * 2), slice(0, 796), slice(0, 512)), # 2 chunks
@@ -216,7 +223,8 @@
216223
name="OphysTestCase3",
217224
https_url=get_https_url(
218225
dandiset_id="213889",
219-
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json",
226+
dandi_path="sub-R6/sub-R6_behavior+ophys.nwb.lindi.json",
227+
follow_redirects=False,
220228
),
221229
object_name="TwoPhotonSeries",
222230
slice_range=(slice(0, 20 * 3), slice(0, 796), slice(0, 512)), # 3 chunks
@@ -225,7 +233,8 @@
225233
name="OphysTestCase4",
226234
https_url=get_https_url(
227235
dandiset_id="213889",
228-
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json",
236+
dandi_path="sub-R6/sub-R6_behavior+ophys.nwb.lindi.json",
237+
follow_redirects=False,
229238
),
230239
object_name="TwoPhotonSeries",
231240
slice_range=(slice(0, 20 * 4), slice(0, 796), slice(0, 512)), # 4 chunks
@@ -234,7 +243,8 @@
234243
name="OphysTestCase5",
235244
https_url=get_https_url(
236245
dandiset_id="213889",
237-
dandi_path="sub-R6_ses-20200206T210000_behavior+ophys/sub-R6_ses-20200206T210000_behavior+ophys.lindi.json",
246+
dandi_path="sub-R6/sub-R6_behavior+ophys.nwb.lindi.json",
247+
follow_redirects=False,
238248
),
239249
object_name="TwoPhotonSeries",
240250
slice_range=(slice(0, 20 * 5), slice(0, 796), slice(0, 512)), # 5 chunks
@@ -244,6 +254,7 @@
244254
https_url=get_https_url(
245255
dandiset_id="213889",
246256
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json",
257+
follow_redirects=False,
247258
),
248259
object_name="data_00002_AD0",
249260
slice_range=(slice(0, 81_920),), # 10 chunks
@@ -253,6 +264,7 @@
253264
https_url=get_https_url(
254265
dandiset_id="213889",
255266
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json",
267+
follow_redirects=False,
256268
),
257269
object_name="data_00002_AD0",
258270
slice_range=(slice(0, 81_920 * 2),), # 20 chunks
@@ -262,6 +274,7 @@
262274
https_url=get_https_url(
263275
dandiset_id="213889",
264276
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json",
277+
follow_redirects=False,
265278
),
266279
object_name="data_00002_AD0",
267280
slice_range=(slice(0, 81_920 * 3),), # 30 chunks
@@ -271,6 +284,7 @@
271284
https_url=get_https_url(
272285
dandiset_id="213889",
273286
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json",
287+
follow_redirects=False,
274288
),
275289
object_name="data_00002_AD0",
276290
slice_range=(slice(0, 81_920 * 4),), # 40 chunks
@@ -280,6 +294,7 @@
280294
https_url=get_https_url(
281295
dandiset_id="213889",
282296
dandi_path="sub-1214579789_ses-1214621812_icephys/sub-1214579789_ses-1214621812_icephys.lindi.json",
297+
follow_redirects=False,
283298
),
284299
object_name="data_00002_AD0",
285300
slice_range=(slice(0, 81_920 * 5),), # 50 chunks

0 commit comments

Comments
 (0)