Skip to content

Commit 3aa3dab

Browse files
stephprincerly
andauthored
Use cached namespaces during validation (#531)
* update pynwb validation to use cached namespaces * add zarr check when searching for nwbfiles * add dir test, fix parallel test indent * update zarr dir detection * update CHANGELOG.md --------- Co-authored-by: Ryan Ly <[email protected]>
1 parent c26924b commit 3aa3dab

File tree

3 files changed

+84
-68
lines changed

3 files changed

+84
-68
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
### Fixes
2020
* Fixed incorrect error message for OptogeneticStimulusSite. [#524](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/524)
21+
* Fixed detection of Zarr directories for inspection. [#531](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/531)
2122

2223

2324
# v0.5.2

src/nwbinspector/_nwb_inspection.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@
99
from warnings import filterwarnings, warn
1010

1111
import pynwb
12+
from hdmf_zarr import ZarrIO
1213
from natsort import natsorted
1314
from tqdm import tqdm
1415

1516
from ._configuration import configure_checks
1617
from ._registration import Importance, InspectorMessage, available_checks
17-
from .tools._read_nwbfile import read_nwbfile, read_nwbfile_and_io
18+
from .tools._read_nwbfile import read_nwbfile
1819
from .utils import (
1920
OptionalListOfStrings,
2021
PathType,
@@ -126,7 +127,9 @@ def inspect_all(
126127
if progress_bar_options is None:
127128
progress_bar_options = dict(position=0, leave=False)
128129

129-
if in_path.is_dir():
130+
if in_path.is_dir() and (in_path.match("*.nwb*")) and ZarrIO.can_read(in_path):
131+
nwbfiles = [in_path] # if it is a zarr directory
132+
elif in_path.is_dir():
130133
nwbfiles = list(in_path.rglob("*.nwb*"))
131134

132135
# Remove any macOS sidecar files
@@ -271,10 +274,10 @@ def inspect_nwbfile(
271274
filterwarnings(action="ignore", message="Ignoring cached namespace .*")
272275

273276
try:
274-
in_memory_nwbfile, io = read_nwbfile_and_io(nwbfile_path=nwbfile_path)
277+
in_memory_nwbfile = read_nwbfile(nwbfile_path=nwbfile_path)
275278

276279
if not skip_validate:
277-
validation_errors = pynwb.validate(io=io)
280+
validation_errors, _ = pynwb.validate(paths=[nwbfile_path])
278281
for validation_error in validation_errors:
279282
yield InspectorMessage(
280283
message=validation_error.reason,

tests/test_inspector.py

Lines changed: 76 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -268,74 +268,86 @@ def test_inspect_all(self):
268268
]
269269
self.assertCountEqual(first=test_results, second=true_results)
270270

271-
def test_inspect_all_parallel(self):
272-
test_results = list(
273-
inspect_all(path=Path(self.nwbfile_paths[0]).parent, select=[x.__name__ for x in self.checks], n_jobs=2)
271+
def test_inspect_all_parallel(self):
272+
test_results = list(
273+
inspect_all(
274+
path=Path(self.nwbfile_paths[0]).parent,
275+
select=[x.__name__ for x in self.checks],
276+
n_jobs=2,
277+
skip_validate=self.skip_validate,
274278
)
275-
true_results = [
276-
InspectorMessage(
277-
message="data is not compressed. Consider enabling compression when writing a dataset.",
278-
importance=Importance.BEST_PRACTICE_SUGGESTION,
279-
severity=Severity.LOW,
280-
check_function_name="check_small_dataset_compression",
281-
object_type="TimeSeries",
282-
object_name="test_time_series_1",
283-
location="/acquisition/test_time_series_1",
284-
file_path=self.nwbfile_paths[0],
285-
),
286-
InspectorMessage(
287-
message=(
288-
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
289-
"and rate=0.5 instead of timestamps."
290-
),
291-
importance=Importance.BEST_PRACTICE_VIOLATION,
292-
severity=Severity.LOW,
293-
check_function_name="check_regular_timestamps",
294-
object_type="TimeSeries",
295-
object_name="test_time_series_2",
296-
location="/acquisition/test_time_series_2",
297-
file_path=self.nwbfile_paths[0],
298-
),
299-
InspectorMessage(
300-
message=(
301-
"Data may be in the wrong orientation. Time should be in the first dimension, and is usually "
302-
"the longest dimension. Here, another dimension is longer."
303-
),
304-
importance=Importance.CRITICAL,
305-
severity=Severity.LOW,
306-
check_function_name="check_data_orientation",
307-
object_type="SpatialSeries",
308-
object_name="my_spatial_series",
309-
location="/processing/behavior/Position/my_spatial_series",
310-
file_path=self.nwbfile_paths[0],
279+
)
280+
true_results = [
281+
InspectorMessage(
282+
message="data is not compressed. Consider enabling compression when writing a dataset.",
283+
importance=Importance.BEST_PRACTICE_SUGGESTION,
284+
severity=Severity.LOW,
285+
check_function_name="check_small_dataset_compression",
286+
object_type="TimeSeries",
287+
object_name="test_time_series_1",
288+
location="/acquisition/test_time_series_1",
289+
file_path=self.nwbfile_paths[0],
290+
),
291+
InspectorMessage(
292+
message=(
293+
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
294+
"and rate=0.5 instead of timestamps."
311295
),
312-
InspectorMessage(
313-
message=(
314-
"The length of the first dimension of data (4) does not match the length of timestamps (3)."
315-
),
316-
importance=Importance.CRITICAL,
317-
severity=Severity.LOW,
318-
check_function_name="check_timestamps_match_first_dimension",
319-
object_type="TimeSeries",
320-
object_name="test_time_series_3",
321-
location="/acquisition/test_time_series_3",
322-
file_path=self.nwbfile_paths[0],
296+
importance=Importance.BEST_PRACTICE_VIOLATION,
297+
severity=Severity.LOW,
298+
check_function_name="check_regular_timestamps",
299+
object_type="TimeSeries",
300+
object_name="test_time_series_2",
301+
location="/acquisition/test_time_series_2",
302+
file_path=self.nwbfile_paths[0],
303+
),
304+
InspectorMessage(
305+
message=(
306+
"Data may be in the wrong orientation. Time should be in the first dimension, and is usually "
307+
"the longest dimension. Here, another dimension is longer."
323308
),
324-
InspectorMessage(
325-
message=(
326-
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
327-
"and rate=0.5 instead of timestamps."
328-
),
329-
importance=Importance.BEST_PRACTICE_VIOLATION,
330-
severity=Severity.LOW,
331-
check_function_name="check_regular_timestamps",
332-
object_type="TimeSeries",
333-
object_name="test_time_series_2",
334-
location="/acquisition/test_time_series_2",
335-
file_path=self.nwbfile_paths[1],
309+
importance=Importance.CRITICAL,
310+
severity=Severity.LOW,
311+
check_function_name="check_data_orientation",
312+
object_type="SpatialSeries",
313+
object_name="my_spatial_series",
314+
location="/processing/behavior/Position/my_spatial_series",
315+
file_path=self.nwbfile_paths[0],
316+
),
317+
InspectorMessage(
318+
message=("The length of the first dimension of data (4) does not match the length of timestamps (3)."),
319+
importance=Importance.CRITICAL,
320+
severity=Severity.LOW,
321+
check_function_name="check_timestamps_match_first_dimension",
322+
object_type="TimeSeries",
323+
object_name="test_time_series_3",
324+
location="/acquisition/test_time_series_3",
325+
file_path=self.nwbfile_paths[0],
326+
),
327+
InspectorMessage(
328+
message=(
329+
"TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 "
330+
"and rate=0.5 instead of timestamps."
336331
),
337-
]
338-
self.assertCountEqual(first=test_results, second=true_results)
332+
importance=Importance.BEST_PRACTICE_VIOLATION,
333+
severity=Severity.LOW,
334+
check_function_name="check_regular_timestamps",
335+
object_type="TimeSeries",
336+
object_name="test_time_series_2",
337+
location="/acquisition/test_time_series_2",
338+
file_path=self.nwbfile_paths[1],
339+
),
340+
]
341+
self.assertCountEqual(first=test_results, second=true_results)
342+
343+
def test_inspect_all_directory(self):
344+
"""Test that inspect_all will find the file when given a valid path (in the case of Zarr, this path may be a directory)."""
345+
test_results = list(
346+
inspect_all(
347+
path=self.nwbfile_paths[0], select=[x.__name__ for x in self.checks], skip_validate=self.skip_validate
348+
)
349+
)
350+
self.assertGreater(len(test_results), 0)
339351

340352
def test_inspect_nwbfile(self):
341353
test_results = list(

0 commit comments

Comments
 (0)