diff --git a/CHANGELOG.md b/CHANGELOG.md index 880ab649..5b3362b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### New Checks * Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625) +* Added `check_time_intervals_duration`, which makes sure that `TimeInterval` objects do not have a duration greater than 1 year. +[#635](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/635) ### Improvements * Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) diff --git a/docs/best_practices/tables.rst b/docs/best_practices/tables.rst index 678d86cf..49a26e3e 100644 --- a/docs/best_practices/tables.rst +++ b/docs/best_practices/tables.rst @@ -120,6 +120,8 @@ Times are always stored in seconds in NWB. In :ref:`nwb-schema:sec-TimeIntervals Additional time columns in :ref:`nwb-schema:sec-TimeIntervals` tables, such as the ``TrialsTable`` should have ``_time`` as a suffix to the name. *E.g.*, if you add more times in ``TrialsTable``, such as a subject response time, name it ``response_time`` and store the time values in seconds from the ``timestamps_reference_time`` of the :ref:`nwb-schema:sec-NWBFile`, just like ``start_time`` and ``stop_time``. This convention is used by downstream processing tools. For instance, NWBWidgets uses these times to create peri-stimulus time histograms relating spiking activity to trial events. See :ref:`best_practice_global_time_reference` for more details. +Check function :py:meth:`~nwbinspector.checks._tables.check_time_intervals_duration` + .. _best_practice_unique_dynamic_table_ids: Unique ids diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index 3011d187..096247aa 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -75,6 +75,7 @@ check_table_time_columns_are_not_negative, check_table_values_for_dict, check_time_interval_time_columns, + check_time_intervals_duration, check_time_intervals_stop_after_start, ) from ._time_series import ( diff --git a/src/nwbinspector/checks/_tables.py b/src/nwbinspector/checks/_tables.py index ef99d02e..a652147c 100644 --- a/src/nwbinspector/checks/_tables.py +++ b/src/nwbinspector/checks/_tables.py @@ -18,6 +18,7 @@ ) NELEMS = 200 +MAX_DURATION = 3600 * 24 * 365.25 # default: 1 year @register_check(importance=Importance.CRITICAL, neurodata_type=DynamicTableRegion) @@ -292,3 +293,61 @@ def check_table_time_columns_are_not_negative(table: DynamicTable) -> Optional[I ) return None + + +@register_check(importance=Importance.CRITICAL, neurodata_type=TimeIntervals) +def check_time_intervals_duration( + time_intervals: TimeIntervals, duration_threshold: float = MAX_DURATION +) -> Optional[InspectorMessage]: + """ + Check if the duration spanned by time columns in a TimeIntervals table exceeds a threshold. + + This check examines start_time, stop_time, and any other columns ending in _time. + + Best Practice: :ref:`best_practice_time_interval_time_columns` + + Parameters + ---------- + time_intervals: TimeIntervals + The table to check + duration_threshold: float, optional + Maximum expected duration in seconds. Default is 1 year (365.25 days). + """ + if len(time_intervals.id) == 0: + return None + + start_times = [] + end_times = [] + + # Check for start_time and stop_time columns + if "start_time" in time_intervals.colnames and len(time_intervals["start_time"]) > 0: + start_times.append(float(time_intervals["start_time"][0])) + if "stop_time" in time_intervals.colnames and len(time_intervals["stop_time"]) > 0: + end_times.append(float(time_intervals["stop_time"][-1])) + + # Check for other time columns + for column_name in time_intervals.colnames: + if ( + column_name.endswith("_time") + and column_name not in ["start_time", "stop_time"] + and len(time_intervals[column_name]) > 0 + ): + data = time_intervals[column_name] + start_times.append(float(data[0])) + end_times.append(float(data[-1])) + + if start_times and end_times: + duration = max(end_times) - min(start_times) + + if duration > duration_threshold: + duration_years = duration / 31557600.0 + threshold_years = duration_threshold / 31557600.0 + return InspectorMessage( + message=( + f"TimeIntervals table '{time_intervals.name}' has a duration of {duration:.2f} seconds " + f"({duration_years:.2f} years), which exceeds the threshold of " + f"{duration_threshold:.2f} seconds ({threshold_years:.2f} years). " + "Please verify that this is correct." + ) + ) + return None diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py index 3e0b8027..8a7929e9 100644 --- a/tests/unit_tests/test_tables.py +++ b/tests/unit_tests/test_tables.py @@ -18,6 +18,7 @@ check_table_time_columns_are_not_negative, check_table_values_for_dict, check_time_interval_time_columns, + check_time_intervals_duration, check_time_intervals_stop_after_start, ) @@ -498,3 +499,72 @@ def test_table_time_columns_are_not_negative_multidimensional_pass(): test_table.add_row(test_time=[0.0, 1.0, 2.0, 3.0]) assert check_table_time_columns_are_not_negative(test_table) is None + + +def test_check_time_intervals_duration_pass_short(): + """Test that short duration tables pass the check.""" + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=10.0) + table.add_row(start_time=15.0, stop_time=25.0) + table.add_row(start_time=30.0, stop_time=100.0) + + assert check_time_intervals_duration(table) is None + + +def test_check_time_intervals_duration_fail_exceeds_threshold(): + """Test that tables with duration exceeding 1 year fail.""" + one_year = 31557600.0 + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=100.0) + table.add_row(start_time=one_year + 1000, stop_time=one_year + 2000) + + result = check_time_intervals_duration(table) + assert result is not None + assert "trials" in result.message + assert "exceeds the threshold" in result.message + assert result.importance == Importance.CRITICAL + + +def test_check_time_intervals_duration_pass_empty(): + """Test that empty tables pass.""" + table = TimeIntervals(name="trials", description="test trials") + assert check_time_intervals_duration(table) is None + + +def test_check_time_intervals_duration_pass_custom_threshold(): + """Test that custom threshold works correctly.""" + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=100.0) + table.add_row(start_time=150.0, stop_time=200.0) + + # Should fail with 100 second threshold + result = check_time_intervals_duration(table, duration_threshold=100.0) + assert result is not None + + # Should pass with 300 second threshold + result = check_time_intervals_duration(table, duration_threshold=300.0) + assert result is None + + +def test_check_time_intervals_duration_with_additional_time_columns(): + """Test that the check considers additional time columns ending in '_time'.""" + one_year = 31557600.0 + table = TimeIntervals(name="trials", description="test trials") + table.add_column(name="custom_time", description="custom time column") + table.add_row(start_time=0.0, stop_time=100.0, custom_time=0.0) + table.add_row(start_time=150.0, stop_time=200.0, custom_time=one_year + 1000) + + result = check_time_intervals_duration(table) + assert result is not None + assert "trials" in result.message + assert "exceeds the threshold" in result.message + + +def test_check_time_intervals_duration_pass_without_start_stop(): + """Test that tables with only other time columns work correctly.""" + table = TimeIntervals(name="trials", description="test trials") + table.add_column(name="custom_time", description="custom time column") + table.add_row(start_time=0.0, stop_time=10.0, custom_time=5.0) + table.add_row(start_time=15.0, stop_time=25.0, custom_time=20.0) + + assert check_time_intervals_duration(table) is None