Skip to content
Open
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# v0.6.6 (Upcoming)

### New Checks
* Added checks for the duration of DynamicTables by checking start_time, stop_time, timestamp, duration, and spike_times columns. [#628](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/628)
* Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625)

### Improvements
Expand Down
2 changes: 2 additions & 0 deletions src/nwbinspector/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
check_ids_unique,
check_single_row,
check_table_time_columns_are_not_negative,
check_table_time_columns_duration,
check_table_values_for_dict,
check_time_interval_time_columns,
check_time_intervals_stop_after_start,
Expand Down Expand Up @@ -142,6 +143,7 @@
"check_time_intervals_stop_after_start",
"check_table_values_for_dict",
"check_table_time_columns_are_not_negative",
"check_table_time_columns_duration",
"check_resolution",
"check_missing_unit",
"check_regular_timestamps",
Expand Down
87 changes: 87 additions & 0 deletions src/nwbinspector/checks/_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,90 @@ def check_table_time_columns_are_not_negative(table: DynamicTable) -> Optional[I
)

return None


@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable)
def check_table_time_columns_duration(
table: DynamicTable, duration_threshold: float = 31557600.0
) -> Optional[InspectorMessage]:
"""
Check if the duration spanned by time columns in a DynamicTable exceeds a threshold.

This check examines time-related columns (start_time, stop_time, timestamp, spike_times)
and calculates the duration as max(time) - min(time). If this exceeds the threshold
(default: 1 year = 31,557,600 seconds), a warning is issued.

Parameters
----------
table: DynamicTable
The table to check
duration_threshold: float, optional
Maximum expected duration in seconds. Default is 1 year (365.25 days).

Returns
-------
Optional[InspectorMessage]
Warning message if duration exceeds threshold, None otherwise
"""
if len(table.id) == 0:
return None # Empty table

start_times = []
end_times = []

# Check for start_time and stop_time columns (e.g., trials)
if "start_time" in table.colnames and len(table["start_time"]) > 0:
start_times.append(float(table["start_time"][0]))
if "stop_time" in table.colnames and len(table["stop_time"]) > 0:
end_times.append(float(table["stop_time"][-1]))
Comment on lines +328 to +330
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also check max/min of all start and stop times if we don't want to assume that these columns are also following ascending order best practices


# Check for timestamp column (possibly with duration)
if "timestamp" in table.colnames and len(table["timestamp"]) > 0:
timestamp_data = table["timestamp"]
start_times.append(float(timestamp_data[0]))

if "duration" in table.colnames and len(table["duration"]) > 0:
duration_data = table["duration"]
end_times.append(float(timestamp_data[-1] + duration_data[-1]))
else:
end_times.append(float(timestamp_data[-1]))
Comment on lines +332 to +341
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we recommend storing data this way somewhere? This seems relatively arbitrary to look for (vs. the start_time/end_time and spike_times columns we know exist in DynamicTables from the schema).

Suggested change
# Check for timestamp column (possibly with duration)
if "timestamp" in table.colnames and len(table["timestamp"]) > 0:
timestamp_data = table["timestamp"]
start_times.append(float(timestamp_data[0]))
if "duration" in table.colnames and len(table["duration"]) > 0:
duration_data = table["duration"]
end_times.append(float(timestamp_data[-1] + duration_data[-1]))
else:
end_times.append(float(timestamp_data[-1]))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some other inspector checks that search for column names ending with _time. I think those could be checked for duration

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for ndx-events. I should have mentioned that somewhere, huh?


# Check for spike_times column (Units table)
# Assume spike times are ordered within each unit
if "spike_times" in table.colnames and len(table["spike_times"]) > 0:
idxs = table["spike_times"].data[:]

# Remove zeros from idxs (units with no spikes)
idxs = idxs[idxs != 0]

if len(idxs) > 0:
st_data = table["spike_times"].target

if len(idxs) > 1:
start = float(np.min(np.r_[st_data[0], st_data[idxs[:-1]]]))
else:
start = float(st_data[0])

end = float(np.max(st_data[idxs - 1]))
start_times.append(start)
end_times.append(end)

# Calculate duration if we found any time data
if start_times and end_times:
duration = max(end_times) - min(start_times)

# Check if duration exceeds threshold
if duration > duration_threshold:
# Convert to years for the message
duration_years = duration / 31557600.0
threshold_years = duration_threshold / 31557600.0
return InspectorMessage(
message=(
f"DynamicTable '{table.name}' has a duration of {duration:.2f} seconds "
f"({duration_years:.2f} years), which exceeds the threshold of "
f"{duration_threshold:.2f} seconds ({threshold_years:.2f} years). "
"Please verify that this is correct."
)
)

return None
80 changes: 80 additions & 0 deletions tests/unit_tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
check_ids_unique,
check_single_row,
check_table_time_columns_are_not_negative,
check_table_time_columns_duration,
check_table_values_for_dict,
check_time_interval_time_columns,
check_time_intervals_stop_after_start,
Expand Down Expand Up @@ -498,3 +499,82 @@ def test_table_time_columns_are_not_negative_multidimensional_pass():
test_table.add_row(test_time=[0.0, 1.0, 2.0, 3.0])

assert check_table_time_columns_are_not_negative(test_table) is None


def test_check_table_time_columns_duration_pass_short():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add tests for the Units table / spike_times case?

"""Test that short duration tables pass the check."""
table = TimeIntervals(name="trials", description="test trials")
table.add_row(start_time=0.0, stop_time=10.0)
table.add_row(start_time=15.0, stop_time=25.0)
table.add_row(start_time=30.0, stop_time=100.0)

assert check_table_time_columns_duration(table) is None


def test_check_table_time_columns_duration_fail_exceeds_threshold():
"""Test that tables with duration exceeding 1 year fail."""
one_year = 31557600.0
table = TimeIntervals(name="trials", description="test trials")
table.add_row(start_time=0.0, stop_time=100.0)
table.add_row(start_time=one_year + 1000, stop_time=one_year + 2000)

result = check_table_time_columns_duration(table)
assert result is not None
assert "trials" in result.message
assert "exceeds the threshold" in result.message
assert result.importance == Importance.BEST_PRACTICE_SUGGESTION


def test_check_table_time_columns_duration_pass_empty():
"""Test that empty tables pass."""
table = TimeIntervals(name="trials", description="test trials")
assert check_table_time_columns_duration(table) is None


def test_check_table_time_columns_duration_pass_custom_threshold():
"""Test that custom threshold works correctly."""
table = TimeIntervals(name="trials", description="test trials")
table.add_row(start_time=0.0, stop_time=100.0)
table.add_row(start_time=150.0, stop_time=200.0)

# Should fail with 100 second threshold
result = check_table_time_columns_duration(table, duration_threshold=100.0)
assert result is not None

# Should pass with 300 second threshold
result = check_table_time_columns_duration(table, duration_threshold=300.0)
assert result is None


def test_check_table_time_columns_duration_with_timestamp():
"""Test with timestamp column."""
table = DynamicTable(name="events", description="test events")
table.add_column(name="timestamp", description="event timestamps")
table.add_row(timestamp=0.0)
table.add_row(timestamp=100.0)

assert check_table_time_columns_duration(table) is None


def test_check_table_time_columns_duration_with_timestamp_and_duration():
"""Test with timestamp and duration columns."""
one_year = 31557600.0
table = DynamicTable(name="events", description="test events")
table.add_column(name="timestamp", description="event timestamps")
table.add_column(name="duration", description="event durations")
table.add_row(timestamp=0.0, duration=10.0)
table.add_row(timestamp=one_year, duration=1000.0)

result = check_table_time_columns_duration(table)
assert result is not None
assert "exceeds the threshold" in result.message
Comment on lines +549 to +570
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment in _tables.py about these column names and whether we should keep these tests



def test_check_table_time_columns_duration_no_time_columns():
"""Test that tables without time columns pass."""
table = DynamicTable(name="test_table", description="test")
table.add_column(name="value", description="some data")
table.add_row(value=123)
table.add_row(value=456)

assert check_table_time_columns_duration(table) is None
Loading