From 8475f38abb4f0ff4bf442e8e5243322ed31b6d00 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 9 Oct 2025 09:16:35 -0400 Subject: [PATCH 1/5] Add check for duration of time columns in DynamicTable --- src/nwbinspector/checks/__init__.py | 2 + src/nwbinspector/checks/_tables.py | 87 +++++++++++++++++++++++++++ tests/unit_tests/test_tables.py | 93 +++++++++++++++++++++++++++++ 3 files changed, 182 insertions(+) diff --git a/src/nwbinspector/checks/__init__.py b/src/nwbinspector/checks/__init__.py index cf231978..2380666c 100644 --- a/src/nwbinspector/checks/__init__.py +++ b/src/nwbinspector/checks/__init__.py @@ -72,6 +72,7 @@ check_ids_unique, check_single_row, check_table_time_columns_are_not_negative, + check_table_time_columns_duration, check_table_values_for_dict, check_time_interval_time_columns, check_time_intervals_stop_after_start, @@ -140,6 +141,7 @@ "check_time_intervals_stop_after_start", "check_table_values_for_dict", "check_table_time_columns_are_not_negative", + "check_table_time_columns_duration", "check_resolution", "check_missing_unit", "check_regular_timestamps", diff --git a/src/nwbinspector/checks/_tables.py b/src/nwbinspector/checks/_tables.py index ef99d02e..3cca0e99 100644 --- a/src/nwbinspector/checks/_tables.py +++ b/src/nwbinspector/checks/_tables.py @@ -292,3 +292,90 @@ def check_table_time_columns_are_not_negative(table: DynamicTable) -> Optional[I ) return None + + +@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable) +def check_table_time_columns_duration( + table: DynamicTable, duration_threshold: float = 31557600.0 +) -> Optional[InspectorMessage]: + """ + Check if the duration spanned by time columns in a DynamicTable exceeds a threshold. + + This check examines time-related columns (start_time, stop_time, timestamp, spike_times) + and calculates the duration as max(time) - min(time). If this exceeds the threshold + (default: 1 year = 31,557,600 seconds), a warning is issued. + + Parameters + ---------- + table: DynamicTable + The table to check + duration_threshold: float, optional + Maximum expected duration in seconds. Default is 1 year (365.25 days). + + Returns + ------- + Optional[InspectorMessage] + Warning message if duration exceeds threshold, None otherwise + """ + if len(table.id) == 0: + return None # Empty table + + start_times = [] + end_times = [] + + # Check for start_time and stop_time columns (e.g., trials) + if "start_time" in table.colnames and len(table["start_time"]) > 0: + start_times.append(float(table["start_time"][0])) + if "stop_time" in table.colnames and len(table["stop_time"]) > 0: + end_times.append(float(table["stop_time"][-1])) + + # Check for timestamp column (possibly with duration) + if "timestamp" in table.colnames and len(table["timestamp"]) > 0: + timestamp_data = table["timestamp"] + start_times.append(float(timestamp_data[0])) + + if "duration" in table.colnames and len(table["duration"]) > 0: + duration_data = table["duration"] + end_times.append(float(timestamp_data[-1] + duration_data[-1])) + else: + end_times.append(float(timestamp_data[-1])) + + # Check for spike_times column (Units table) + # Assume spike times are ordered within each unit + if "spike_times" in table.colnames and len(table["spike_times"]) > 0: + idxs = table["spike_times"].data[:] + + # Remove zeros from idxs (units with no spikes) + idxs = idxs[idxs != 0] + + if len(idxs) > 0: + st_data = table["spike_times"].target + + if len(idxs) > 1: + start = float(np.min(np.r_[st_data[0], st_data[idxs[:-1]]])) + else: + start = float(st_data[0]) + + end = float(np.max(st_data[idxs - 1])) + start_times.append(start) + end_times.append(end) + + # Calculate duration if we found any time data + if start_times and end_times: + duration = max(end_times) - min(start_times) + + # Check if duration exceeds threshold + if duration > duration_threshold: + # Convert to years for the message + duration_years = duration / 31557600.0 + threshold_years = duration_threshold / 31557600.0 + return InspectorMessage( + message=( + f"DynamicTable '{table.name}' has a duration of {duration:.2f} seconds " + f"({duration_years:.2f} years), which exceeds the threshold of " + f"{duration_threshold:.2f} seconds ({threshold_years:.2f} years). " + "Please verify that this is correct." + ) + ) + + return None diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py index 3e0b8027..aa49a22c 100644 --- a/tests/unit_tests/test_tables.py +++ b/tests/unit_tests/test_tables.py @@ -16,6 +16,7 @@ check_ids_unique, check_single_row, check_table_time_columns_are_not_negative, + check_table_time_columns_duration, check_table_values_for_dict, check_time_interval_time_columns, check_time_intervals_stop_after_start, @@ -498,3 +499,95 @@ def test_table_time_columns_are_not_negative_multidimensional_pass(): test_table.add_row(test_time=[0.0, 1.0, 2.0, 3.0]) assert check_table_time_columns_are_not_negative(test_table) is None + + +def test_check_table_time_columns_duration_pass_short(): + """Test that short duration tables pass the check.""" + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=10.0) + table.add_row(start_time=15.0, stop_time=25.0) + table.add_row(start_time=30.0, stop_time=100.0) + + assert check_table_time_columns_duration(table) is None + + +def test_check_table_time_columns_duration_fail_exceeds_threshold(): + """Test that tables with duration exceeding 1 year fail.""" + one_year = 31557600.0 + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=100.0) + table.add_row(start_time=one_year + 1000, stop_time=one_year + 2000) + + result = check_table_time_columns_duration(table) + assert result is not None + assert "trials" in result.message + assert "exceeds the threshold" in result.message + assert result.importance == Importance.BEST_PRACTICE_SUGGESTION + + +def test_check_table_time_columns_duration_fail_exceeds_five_years(): + """Test that tables with duration exceeding 5 years get a more serious warning.""" + six_years = 31557600.0 * 6 + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=100.0) + table.add_row(start_time=six_years, stop_time=six_years + 100) + + result = check_table_time_columns_duration(table) + assert result is not None + assert "exceeds 5 years" in result.message + assert "error" in result.message.lower() + + +def test_check_table_time_columns_duration_pass_empty(): + """Test that empty tables pass.""" + table = TimeIntervals(name="trials", description="test trials") + assert check_table_time_columns_duration(table) is None + + +def test_check_table_time_columns_duration_pass_custom_threshold(): + """Test that custom threshold works correctly.""" + table = TimeIntervals(name="trials", description="test trials") + table.add_row(start_time=0.0, stop_time=100.0) + table.add_row(start_time=150.0, stop_time=200.0) + + # Should fail with 100 second threshold + result = check_table_time_columns_duration(table, duration_threshold=100.0) + assert result is not None + + # Should pass with 300 second threshold + result = check_table_time_columns_duration(table, duration_threshold=300.0) + assert result is None + + +def test_check_table_time_columns_duration_with_timestamp(): + """Test with timestamp column.""" + table = DynamicTable(name="events", description="test events") + table.add_column(name="timestamp", description="event timestamps") + table.add_row(timestamp=0.0) + table.add_row(timestamp=100.0) + + assert check_table_time_columns_duration(table) is None + + +def test_check_table_time_columns_duration_with_timestamp_and_duration(): + """Test with timestamp and duration columns.""" + one_year = 31557600.0 + table = DynamicTable(name="events", description="test events") + table.add_column(name="timestamp", description="event timestamps") + table.add_column(name="duration", description="event durations") + table.add_row(timestamp=0.0, duration=10.0) + table.add_row(timestamp=one_year, duration=1000.0) + + result = check_table_time_columns_duration(table) + assert result is not None + assert "exceeds the threshold" in result.message + + +def test_check_table_time_columns_duration_no_time_columns(): + """Test that tables without time columns pass.""" + table = DynamicTable(name="test_table", description="test") + table.add_column(name="value", description="some data") + table.add_row(value=123) + table.add_row(value=456) + + assert check_table_time_columns_duration(table) is None From ccef30b6eccf24fbed9893df36a3b5b88701f751 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:17:17 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/nwbinspector/checks/_tables.py | 12 ++++++------ tests/unit_tests/test_tables.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/nwbinspector/checks/_tables.py b/src/nwbinspector/checks/_tables.py index 3cca0e99..a77ba144 100644 --- a/src/nwbinspector/checks/_tables.py +++ b/src/nwbinspector/checks/_tables.py @@ -333,7 +333,7 @@ def check_table_time_columns_duration( if "timestamp" in table.colnames and len(table["timestamp"]) > 0: timestamp_data = table["timestamp"] start_times.append(float(timestamp_data[0])) - + if "duration" in table.colnames and len(table["duration"]) > 0: duration_data = table["duration"] end_times.append(float(timestamp_data[-1] + duration_data[-1])) @@ -344,18 +344,18 @@ def check_table_time_columns_duration( # Assume spike times are ordered within each unit if "spike_times" in table.colnames and len(table["spike_times"]) > 0: idxs = table["spike_times"].data[:] - + # Remove zeros from idxs (units with no spikes) idxs = idxs[idxs != 0] - + if len(idxs) > 0: st_data = table["spike_times"].target - + if len(idxs) > 1: start = float(np.min(np.r_[st_data[0], st_data[idxs[:-1]]])) else: start = float(st_data[0]) - + end = float(np.max(st_data[idxs - 1])) start_times.append(start) end_times.append(end) @@ -363,7 +363,7 @@ def check_table_time_columns_duration( # Calculate duration if we found any time data if start_times and end_times: duration = max(end_times) - min(start_times) - + # Check if duration exceeds threshold if duration > duration_threshold: # Convert to years for the message diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py index aa49a22c..a57b91da 100644 --- a/tests/unit_tests/test_tables.py +++ b/tests/unit_tests/test_tables.py @@ -507,7 +507,7 @@ def test_check_table_time_columns_duration_pass_short(): table.add_row(start_time=0.0, stop_time=10.0) table.add_row(start_time=15.0, stop_time=25.0) table.add_row(start_time=30.0, stop_time=100.0) - + assert check_table_time_columns_duration(table) is None @@ -517,7 +517,7 @@ def test_check_table_time_columns_duration_fail_exceeds_threshold(): table = TimeIntervals(name="trials", description="test trials") table.add_row(start_time=0.0, stop_time=100.0) table.add_row(start_time=one_year + 1000, stop_time=one_year + 2000) - + result = check_table_time_columns_duration(table) assert result is not None assert "trials" in result.message @@ -531,7 +531,7 @@ def test_check_table_time_columns_duration_fail_exceeds_five_years(): table = TimeIntervals(name="trials", description="test trials") table.add_row(start_time=0.0, stop_time=100.0) table.add_row(start_time=six_years, stop_time=six_years + 100) - + result = check_table_time_columns_duration(table) assert result is not None assert "exceeds 5 years" in result.message @@ -549,11 +549,11 @@ def test_check_table_time_columns_duration_pass_custom_threshold(): table = TimeIntervals(name="trials", description="test trials") table.add_row(start_time=0.0, stop_time=100.0) table.add_row(start_time=150.0, stop_time=200.0) - + # Should fail with 100 second threshold result = check_table_time_columns_duration(table, duration_threshold=100.0) assert result is not None - + # Should pass with 300 second threshold result = check_table_time_columns_duration(table, duration_threshold=300.0) assert result is None @@ -565,7 +565,7 @@ def test_check_table_time_columns_duration_with_timestamp(): table.add_column(name="timestamp", description="event timestamps") table.add_row(timestamp=0.0) table.add_row(timestamp=100.0) - + assert check_table_time_columns_duration(table) is None @@ -577,7 +577,7 @@ def test_check_table_time_columns_duration_with_timestamp_and_duration(): table.add_column(name="duration", description="event durations") table.add_row(timestamp=0.0, duration=10.0) table.add_row(timestamp=one_year, duration=1000.0) - + result = check_table_time_columns_duration(table) assert result is not None assert "exceeds the threshold" in result.message @@ -589,5 +589,5 @@ def test_check_table_time_columns_duration_no_time_columns(): table.add_column(name="value", description="some data") table.add_row(value=123) table.add_row(value=456) - + assert check_table_time_columns_duration(table) is None From ff0b4c918f2897edd5b0428d7ed34d5c21de55ae Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 9 Oct 2025 11:31:56 -0400 Subject: [PATCH 3/5] Remove test for duration exceeding five years from time columns checks --- tests/unit_tests/test_tables.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/unit_tests/test_tables.py b/tests/unit_tests/test_tables.py index aa49a22c..5e29c18d 100644 --- a/tests/unit_tests/test_tables.py +++ b/tests/unit_tests/test_tables.py @@ -525,19 +525,6 @@ def test_check_table_time_columns_duration_fail_exceeds_threshold(): assert result.importance == Importance.BEST_PRACTICE_SUGGESTION -def test_check_table_time_columns_duration_fail_exceeds_five_years(): - """Test that tables with duration exceeding 5 years get a more serious warning.""" - six_years = 31557600.0 * 6 - table = TimeIntervals(name="trials", description="test trials") - table.add_row(start_time=0.0, stop_time=100.0) - table.add_row(start_time=six_years, stop_time=six_years + 100) - - result = check_table_time_columns_duration(table) - assert result is not None - assert "exceeds 5 years" in result.message - assert "error" in result.message.lower() - - def test_check_table_time_columns_duration_pass_empty(): """Test that empty tables pass.""" table = TimeIntervals(name="trials", description="test trials") From 8d91ae1155f79379bd644bc9db45fe2a350d8cd0 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 10 Oct 2025 13:46:47 -0400 Subject: [PATCH 4/5] updated changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 101c33eb..b9878ee3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,10 @@ # v0.6.6 (Upcoming) +### New Checks +* Added checks for the duration of DynamicTables by checking start_time, stop_time, timestamp, duration, and spike_times columns. [#623]( + ### Improvements -* Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) +* Added documentation to API and CLI docs on how to use the dandi config option. [#628](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/628) # v0.6.5 (July 25, 2025) From a011d44e392eb22a2fc727f42c6f0db674684671 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 13 Nov 2025 14:32:56 -0500 Subject: [PATCH 5/5] Update CHANGELOG.md Co-authored-by: Steph Prince <40640337+stephprince@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bc7968c..cb8d7b7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ * Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625) ### Improvements -* Added documentation to API and CLI docs on how to use the dandi config option. [#628](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/628) +* Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624) # v0.6.5 (July 25, 2025)