Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### New Checks
* Added `check_file_extension` for NWB file extension best practice recommendations (`.nwb`, `.nwb.h5`, or `.nwb.zarr`) [#625](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/625)
* Added `check_units_table_duration` to detect if the duration of spike times in a Units table exceeds a threshold (default: 1 year), which may indicate spike_times are in the wrong units or there is a data quality issue.


### Improvements
* Added documentation to API and CLI docs on how to use the dandi config option. [#624](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/624)
Expand Down
9 changes: 9 additions & 0 deletions docs/best_practices/ecephys.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,18 @@ The ``ElectrodeTable`` should not contain redundant information that is present

As a concrete example, the package objects from the `SpikeInterface <https://spikeinterface.readthedocs.io/en/latest/>`__ package contain two properties named ``gain_to_uv`` and ``offset_to_uv`` that are used to convert the raw data to microvolts. These properties should not be stored in the `ElectrodeTable` but rather in the ``ElectricalSeries`` object as ``channel_conversion`` and ``offset`` respectively.

.. _best_practice_units_table_duration:

Units Table
-----------

The Units Table contains information about the identified units (putative neurons) from extracellular electrophysiology data.

The spikes associated with each unit are stored in the ``spike_times`` column of the table in seconds.

Check function: :py:meth:`~nwbinspector.checks._ecephys.check_units_table_duration`


.. _best_practice_negative_spike_times:

Negative Spike Times
Expand Down
2 changes: 2 additions & 0 deletions src/nwbinspector/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
check_electrical_series_reference_electrodes_table,
check_negative_spike_times,
check_spike_times_not_in_unobserved_interval,
check_units_table_duration,
)
from ._general import (
check_description,
Expand Down Expand Up @@ -142,6 +143,7 @@
"check_time_intervals_stop_after_start",
"check_table_values_for_dict",
"check_table_time_columns_are_not_negative",
"check_units_table_duration",
"check_resolution",
"check_missing_unit",
"check_regular_timestamps",
Expand Down
70 changes: 70 additions & 0 deletions src/nwbinspector/checks/_ecephys.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from ..utils import get_data_shape

NELEMS = 200
# Default duration threshold: 1 year in seconds
DURATION_THRESHOLD = 31557600.0


@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=Units)
Expand Down Expand Up @@ -140,3 +142,71 @@ def check_ascending_spike_times(units_table: Units, nelems: Optional[int] = NELE
)
)
return None


@register_check(importance=Importance.CRITICAL, neurodata_type=Units)
def check_units_table_duration(
units: Units, duration_threshold: float = DURATION_THRESHOLD
) -> Optional[InspectorMessage]:
"""
Check if the duration of spike times in a Units table exceeds a threshold.

This check helps identify potential issues where spike times may have been stored
in the wrong units (e.g., milliseconds instead of seconds) or have other data
quality issues that result in an unrealistically long recording duration.

Best Practice :ref:`best_practice_units_table_duration`

Parameters
----------
units : Units
The Units table to check.
duration_threshold : float, optional
The duration threshold in seconds. If the duration exceeds this value,
an InspectorMessage is returned. Default is 1 year (31557600 seconds).

Returns
-------
Optional[InspectorMessage]
An InspectorMessage if the duration exceeds the threshold, None otherwise.
"""
# Check for spike_times column (Units table)
if "spike_times" not in units:
return None

idxs = units["spike_times"].data[:]

# remove repeats in idxs array and 0s to remove units with no spikes
idxs = np.unique(np.asarray(idxs))
idxs = idxs[idxs != 0]

spike_times = units["spike_times"].target
if len(idxs) > 1:
start = np.min(np.r_[spike_times[0], spike_times[idxs[:-1]]])
else:
start = spike_times[0]

end = np.max(spike_times[idxs - 1])

if len(spike_times) == 0:
return None

start = float(np.min(spike_times))
end = float(np.max(spike_times))
duration = end - start

# Check if duration exceeds threshold
if duration > duration_threshold:
# Convert to years for the message
duration_years = duration / 31557600.0
threshold_years = duration_threshold / 31557600.0
return InspectorMessage(
message=(
f"Units table has a duration of {duration:.2f} seconds "
f"({duration_years:.2f} years), which exceeds the threshold of "
f"{duration_threshold:.2f} seconds ({threshold_years:.2f} years). "
"This may indicate that spike_times are in the wrong units or there is a data quality issue."
)
)

return None
98 changes: 98 additions & 0 deletions tests/unit_tests/test_ecephys.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
check_electrical_series_reference_electrodes_table,
check_negative_spike_times,
check_spike_times_not_in_unobserved_interval,
check_units_table_duration,
)


Expand Down Expand Up @@ -312,3 +313,100 @@ def test_ascending_spike_times_empty(self):
def test_ascending_spike_times_nelems(self):
self.units_table.add_unit(spike_times=[0.0, 0.1, 0.05])
assert check_ascending_spike_times(units_table=self.units_table, nelems=2) is None


def test_check_units_table_duration_pass():
"""Test that units table with reasonable duration passes."""
units = Units(name="units")
units.add_unit(spike_times=[0.0, 1.0, 2.0])
units.add_unit(spike_times=[0.5, 1.5, 3.0])

assert check_units_table_duration(units) is None


def test_check_units_table_duration_pass_empty_spike_times():
"""Test that units table with no spike times passes."""
units = Units(name="units")
# Add a unit without spike_times
units.add_column(name="custom_col", description="test")
units.add_row(custom_col=1)

assert check_units_table_duration(units) is None


def test_check_units_table_duration_fail():
"""Test that units table with excessive duration fails."""
units = Units(name="units")
# Add spike times spanning more than 1 year (> 31557600 seconds)
units.add_unit(spike_times=[0.0, 1.0, 2.0])
units.add_unit(spike_times=[0.5, 1.5, 40000000.0]) # ~1.27 years

result = check_units_table_duration(units)
assert result == InspectorMessage(
message=(
"Units table has a duration of 40000000.00 seconds "
"(1.27 years), which exceeds the threshold of "
"31557600.00 seconds (1.00 years). "
"This may indicate that spike_times are in the wrong units or there is a data quality issue."
),
importance=Importance.CRITICAL,
check_function_name="check_units_table_duration",
object_type="Units",
object_name="units",
location="/",
)


def test_check_units_table_duration_custom_threshold():
"""Test units table duration check with custom threshold."""
units = Units(name="units")
units.add_unit(spike_times=[0.0, 100.0]) # 100 seconds

# Should pass with default threshold
assert check_units_table_duration(units) is None

# Should fail with custom threshold of 50 seconds
result = check_units_table_duration(units, duration_threshold=50.0)
assert result == InspectorMessage(
message=(
"Units table has a duration of 100.00 seconds "
"(0.00 years), which exceeds the threshold of "
"50.00 seconds (0.00 years). "
"This may indicate that spike_times are in the wrong units or there is a data quality issue."
),
importance=Importance.CRITICAL,
check_function_name="check_units_table_duration",
object_type="Units",
object_name="units",
location="/",
)


def test_check_units_table_duration_single_unit():
"""Test that units table with a single unit and long duration is detected."""
units = Units(name="units")
units.add_unit(spike_times=[0.0, 50000000.0]) # ~1.58 years

result = check_units_table_duration(units)
assert result is not None
assert "Units table has a duration of 50000000.00 seconds" in result.message


def test_check_units_table_duration_first_unit_no_spikes():
"""Test that units table where first unit has no spikes works correctly."""
units = Units(name="units")
units.add_unit(spike_times=[]) # First unit has no spikes
units.add_unit(spike_times=[0.0, 1.0, 2.0]) # Second unit has spikes

# Should pass - duration is only 2 seconds
assert check_units_table_duration(units) is None


def test_check_units_table_duration_second_unit_no_spikes():
"""Test that units table where second unit has no spikes works correctly."""
units = Units(name="units")
units.add_unit(spike_times=[0.0, 1.0, 2.0]) # First unit has spikes
units.add_unit(spike_times=[]) # Second unit has no spikes

# Should pass - duration is only 2 seconds
assert check_units_table_duration(units) is None
Loading