diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f053b8b..91dfd490 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ ### Fixes * Fixed incorrect error message for OptogeneticStimulusSite. [#524](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/524) +* Fixed detection of Zarr directories for inspection. [#531](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/531) # v0.5.2 diff --git a/src/nwbinspector/_nwb_inspection.py b/src/nwbinspector/_nwb_inspection.py index 0c7b9566..53f79d10 100644 --- a/src/nwbinspector/_nwb_inspection.py +++ b/src/nwbinspector/_nwb_inspection.py @@ -9,12 +9,13 @@ from warnings import filterwarnings, warn import pynwb +from hdmf_zarr import ZarrIO from natsort import natsorted from tqdm import tqdm from ._configuration import configure_checks from ._registration import Importance, InspectorMessage, available_checks -from .tools._read_nwbfile import read_nwbfile, read_nwbfile_and_io +from .tools._read_nwbfile import read_nwbfile from .utils import ( OptionalListOfStrings, PathType, @@ -126,7 +127,9 @@ def inspect_all( if progress_bar_options is None: progress_bar_options = dict(position=0, leave=False) - if in_path.is_dir(): + if in_path.is_dir() and (in_path.match("*.nwb*")) and ZarrIO.can_read(in_path): + nwbfiles = [in_path] # if it is a zarr directory + elif in_path.is_dir(): nwbfiles = list(in_path.rglob("*.nwb*")) # Remove any macOS sidecar files @@ -271,10 +274,10 @@ def inspect_nwbfile( filterwarnings(action="ignore", message="Ignoring cached namespace .*") try: - in_memory_nwbfile, io = read_nwbfile_and_io(nwbfile_path=nwbfile_path) + in_memory_nwbfile = read_nwbfile(nwbfile_path=nwbfile_path) if not skip_validate: - validation_errors = pynwb.validate(io=io) + validation_errors, _ = pynwb.validate(paths=[nwbfile_path]) for validation_error in validation_errors: yield InspectorMessage( message=validation_error.reason, diff --git a/tests/test_inspector.py b/tests/test_inspector.py index 2d485969..bb65fc4e 100644 --- a/tests/test_inspector.py +++ b/tests/test_inspector.py @@ -268,74 +268,86 @@ def test_inspect_all(self): ] self.assertCountEqual(first=test_results, second=true_results) - def test_inspect_all_parallel(self): - test_results = list( - inspect_all(path=Path(self.nwbfile_paths[0]).parent, select=[x.__name__ for x in self.checks], n_jobs=2) + def test_inspect_all_parallel(self): + test_results = list( + inspect_all( + path=Path(self.nwbfile_paths[0]).parent, + select=[x.__name__ for x in self.checks], + n_jobs=2, + skip_validate=self.skip_validate, ) - true_results = [ - InspectorMessage( - message="data is not compressed. Consider enabling compression when writing a dataset.", - importance=Importance.BEST_PRACTICE_SUGGESTION, - severity=Severity.LOW, - check_function_name="check_small_dataset_compression", - object_type="TimeSeries", - object_name="test_time_series_1", - location="/acquisition/test_time_series_1", - file_path=self.nwbfile_paths[0], - ), - InspectorMessage( - message=( - "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 " - "and rate=0.5 instead of timestamps." - ), - importance=Importance.BEST_PRACTICE_VIOLATION, - severity=Severity.LOW, - check_function_name="check_regular_timestamps", - object_type="TimeSeries", - object_name="test_time_series_2", - location="/acquisition/test_time_series_2", - file_path=self.nwbfile_paths[0], - ), - InspectorMessage( - message=( - "Data may be in the wrong orientation. Time should be in the first dimension, and is usually " - "the longest dimension. Here, another dimension is longer." - ), - importance=Importance.CRITICAL, - severity=Severity.LOW, - check_function_name="check_data_orientation", - object_type="SpatialSeries", - object_name="my_spatial_series", - location="/processing/behavior/Position/my_spatial_series", - file_path=self.nwbfile_paths[0], + ) + true_results = [ + InspectorMessage( + message="data is not compressed. Consider enabling compression when writing a dataset.", + importance=Importance.BEST_PRACTICE_SUGGESTION, + severity=Severity.LOW, + check_function_name="check_small_dataset_compression", + object_type="TimeSeries", + object_name="test_time_series_1", + location="/acquisition/test_time_series_1", + file_path=self.nwbfile_paths[0], + ), + InspectorMessage( + message=( + "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 " + "and rate=0.5 instead of timestamps." ), - InspectorMessage( - message=( - "The length of the first dimension of data (4) does not match the length of timestamps (3)." - ), - importance=Importance.CRITICAL, - severity=Severity.LOW, - check_function_name="check_timestamps_match_first_dimension", - object_type="TimeSeries", - object_name="test_time_series_3", - location="/acquisition/test_time_series_3", - file_path=self.nwbfile_paths[0], + importance=Importance.BEST_PRACTICE_VIOLATION, + severity=Severity.LOW, + check_function_name="check_regular_timestamps", + object_type="TimeSeries", + object_name="test_time_series_2", + location="/acquisition/test_time_series_2", + file_path=self.nwbfile_paths[0], + ), + InspectorMessage( + message=( + "Data may be in the wrong orientation. Time should be in the first dimension, and is usually " + "the longest dimension. Here, another dimension is longer." ), - InspectorMessage( - message=( - "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 " - "and rate=0.5 instead of timestamps." - ), - importance=Importance.BEST_PRACTICE_VIOLATION, - severity=Severity.LOW, - check_function_name="check_regular_timestamps", - object_type="TimeSeries", - object_name="test_time_series_2", - location="/acquisition/test_time_series_2", - file_path=self.nwbfile_paths[1], + importance=Importance.CRITICAL, + severity=Severity.LOW, + check_function_name="check_data_orientation", + object_type="SpatialSeries", + object_name="my_spatial_series", + location="/processing/behavior/Position/my_spatial_series", + file_path=self.nwbfile_paths[0], + ), + InspectorMessage( + message=("The length of the first dimension of data (4) does not match the length of timestamps (3)."), + importance=Importance.CRITICAL, + severity=Severity.LOW, + check_function_name="check_timestamps_match_first_dimension", + object_type="TimeSeries", + object_name="test_time_series_3", + location="/acquisition/test_time_series_3", + file_path=self.nwbfile_paths[0], + ), + InspectorMessage( + message=( + "TimeSeries appears to have a constant sampling rate. Consider specifying starting_time=1.2 " + "and rate=0.5 instead of timestamps." ), - ] - self.assertCountEqual(first=test_results, second=true_results) + importance=Importance.BEST_PRACTICE_VIOLATION, + severity=Severity.LOW, + check_function_name="check_regular_timestamps", + object_type="TimeSeries", + object_name="test_time_series_2", + location="/acquisition/test_time_series_2", + file_path=self.nwbfile_paths[1], + ), + ] + self.assertCountEqual(first=test_results, second=true_results) + + def test_inspect_all_directory(self): + """Test that inspect_all will find the file when given a valid path (in the case of Zarr, this path may be a directory).""" + test_results = list( + inspect_all( + path=self.nwbfile_paths[0], select=[x.__name__ for x in self.checks], skip_validate=self.skip_validate + ) + ) + self.assertGreater(len(test_results), 0) def test_inspect_nwbfile(self): test_results = list(