-
Notifications
You must be signed in to change notification settings - Fork 11
Add check for duration of time columns in DynamicTable #628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 8 commits
8475f38
ccef30b
ff0b4c9
fdbceef
e7b4045
8d91ae1
90de1a9
c482752
a011d44
8d0c28b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -292,3 +292,90 @@ def check_table_time_columns_are_not_negative(table: DynamicTable) -> Optional[I | |||||||||||||||||||||
| ) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| return None | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| @register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable) | ||||||||||||||||||||||
| def check_table_time_columns_duration( | ||||||||||||||||||||||
| table: DynamicTable, duration_threshold: float = 31557600.0 | ||||||||||||||||||||||
| ) -> Optional[InspectorMessage]: | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| Check if the duration spanned by time columns in a DynamicTable exceeds a threshold. | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| This check examines time-related columns (start_time, stop_time, timestamp, spike_times) | ||||||||||||||||||||||
| and calculates the duration as max(time) - min(time). If this exceeds the threshold | ||||||||||||||||||||||
| (default: 1 year = 31,557,600 seconds), a warning is issued. | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| Parameters | ||||||||||||||||||||||
| ---------- | ||||||||||||||||||||||
| table: DynamicTable | ||||||||||||||||||||||
| The table to check | ||||||||||||||||||||||
| duration_threshold: float, optional | ||||||||||||||||||||||
| Maximum expected duration in seconds. Default is 1 year (365.25 days). | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| Returns | ||||||||||||||||||||||
| ------- | ||||||||||||||||||||||
| Optional[InspectorMessage] | ||||||||||||||||||||||
| Warning message if duration exceeds threshold, None otherwise | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| if len(table.id) == 0: | ||||||||||||||||||||||
| return None # Empty table | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| start_times = [] | ||||||||||||||||||||||
| end_times = [] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Check for start_time and stop_time columns (e.g., trials) | ||||||||||||||||||||||
| if "start_time" in table.colnames and len(table["start_time"]) > 0: | ||||||||||||||||||||||
| start_times.append(float(table["start_time"][0])) | ||||||||||||||||||||||
| if "stop_time" in table.colnames and len(table["stop_time"]) > 0: | ||||||||||||||||||||||
| end_times.append(float(table["stop_time"][-1])) | ||||||||||||||||||||||
|
Comment on lines
+328
to
+330
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could also check max/min of all start and stop times if we don't want to assume that these columns are also following ascending order best practices |
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Check for timestamp column (possibly with duration) | ||||||||||||||||||||||
| if "timestamp" in table.colnames and len(table["timestamp"]) > 0: | ||||||||||||||||||||||
| timestamp_data = table["timestamp"] | ||||||||||||||||||||||
| start_times.append(float(timestamp_data[0])) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if "duration" in table.colnames and len(table["duration"]) > 0: | ||||||||||||||||||||||
| duration_data = table["duration"] | ||||||||||||||||||||||
| end_times.append(float(timestamp_data[-1] + duration_data[-1])) | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| end_times.append(float(timestamp_data[-1])) | ||||||||||||||||||||||
|
Comment on lines
+332
to
+341
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we recommend storing data this way somewhere? This seems relatively arbitrary to look for (vs. the
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are some other inspector checks that search for column names ending with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is for ndx-events. I should have mentioned that somewhere, huh? |
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Check for spike_times column (Units table) | ||||||||||||||||||||||
| # Assume spike times are ordered within each unit | ||||||||||||||||||||||
| if "spike_times" in table.colnames and len(table["spike_times"]) > 0: | ||||||||||||||||||||||
| idxs = table["spike_times"].data[:] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Remove zeros from idxs (units with no spikes) | ||||||||||||||||||||||
| idxs = idxs[idxs != 0] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if len(idxs) > 0: | ||||||||||||||||||||||
| st_data = table["spike_times"].target | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if len(idxs) > 1: | ||||||||||||||||||||||
| start = float(np.min(np.r_[st_data[0], st_data[idxs[:-1]]])) | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| start = float(st_data[0]) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| end = float(np.max(st_data[idxs - 1])) | ||||||||||||||||||||||
| start_times.append(start) | ||||||||||||||||||||||
| end_times.append(end) | ||||||||||||||||||||||
bendichter marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Calculate duration if we found any time data | ||||||||||||||||||||||
| if start_times and end_times: | ||||||||||||||||||||||
bendichter marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||
| duration = max(end_times) - min(start_times) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Check if duration exceeds threshold | ||||||||||||||||||||||
| if duration > duration_threshold: | ||||||||||||||||||||||
| # Convert to years for the message | ||||||||||||||||||||||
| duration_years = duration / 31557600.0 | ||||||||||||||||||||||
| threshold_years = duration_threshold / 31557600.0 | ||||||||||||||||||||||
| return InspectorMessage( | ||||||||||||||||||||||
| message=( | ||||||||||||||||||||||
| f"DynamicTable '{table.name}' has a duration of {duration:.2f} seconds " | ||||||||||||||||||||||
| f"({duration_years:.2f} years), which exceeds the threshold of " | ||||||||||||||||||||||
| f"{duration_threshold:.2f} seconds ({threshold_years:.2f} years). " | ||||||||||||||||||||||
| "Please verify that this is correct." | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| return None | ||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
| check_ids_unique, | ||
| check_single_row, | ||
| check_table_time_columns_are_not_negative, | ||
| check_table_time_columns_duration, | ||
| check_table_values_for_dict, | ||
| check_time_interval_time_columns, | ||
| check_time_intervals_stop_after_start, | ||
|
|
@@ -498,3 +499,82 @@ def test_table_time_columns_are_not_negative_multidimensional_pass(): | |
| test_table.add_row(test_time=[0.0, 1.0, 2.0, 3.0]) | ||
|
|
||
| assert check_table_time_columns_are_not_negative(test_table) is None | ||
|
|
||
|
|
||
| def test_check_table_time_columns_duration_pass_short(): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add tests for the Units table / spike_times case? |
||
| """Test that short duration tables pass the check.""" | ||
| table = TimeIntervals(name="trials", description="test trials") | ||
| table.add_row(start_time=0.0, stop_time=10.0) | ||
| table.add_row(start_time=15.0, stop_time=25.0) | ||
| table.add_row(start_time=30.0, stop_time=100.0) | ||
|
|
||
| assert check_table_time_columns_duration(table) is None | ||
|
|
||
|
|
||
| def test_check_table_time_columns_duration_fail_exceeds_threshold(): | ||
| """Test that tables with duration exceeding 1 year fail.""" | ||
| one_year = 31557600.0 | ||
| table = TimeIntervals(name="trials", description="test trials") | ||
| table.add_row(start_time=0.0, stop_time=100.0) | ||
| table.add_row(start_time=one_year + 1000, stop_time=one_year + 2000) | ||
|
|
||
| result = check_table_time_columns_duration(table) | ||
| assert result is not None | ||
| assert "trials" in result.message | ||
| assert "exceeds the threshold" in result.message | ||
| assert result.importance == Importance.BEST_PRACTICE_SUGGESTION | ||
|
|
||
|
|
||
| def test_check_table_time_columns_duration_pass_empty(): | ||
| """Test that empty tables pass.""" | ||
| table = TimeIntervals(name="trials", description="test trials") | ||
| assert check_table_time_columns_duration(table) is None | ||
|
|
||
|
|
||
| def test_check_table_time_columns_duration_pass_custom_threshold(): | ||
| """Test that custom threshold works correctly.""" | ||
| table = TimeIntervals(name="trials", description="test trials") | ||
| table.add_row(start_time=0.0, stop_time=100.0) | ||
| table.add_row(start_time=150.0, stop_time=200.0) | ||
|
|
||
| # Should fail with 100 second threshold | ||
| result = check_table_time_columns_duration(table, duration_threshold=100.0) | ||
| assert result is not None | ||
|
|
||
| # Should pass with 300 second threshold | ||
| result = check_table_time_columns_duration(table, duration_threshold=300.0) | ||
| assert result is None | ||
|
|
||
|
|
||
| def test_check_table_time_columns_duration_with_timestamp(): | ||
| """Test with timestamp column.""" | ||
| table = DynamicTable(name="events", description="test events") | ||
| table.add_column(name="timestamp", description="event timestamps") | ||
| table.add_row(timestamp=0.0) | ||
| table.add_row(timestamp=100.0) | ||
|
|
||
| assert check_table_time_columns_duration(table) is None | ||
|
|
||
|
|
||
| def test_check_table_time_columns_duration_with_timestamp_and_duration(): | ||
| """Test with timestamp and duration columns.""" | ||
| one_year = 31557600.0 | ||
| table = DynamicTable(name="events", description="test events") | ||
| table.add_column(name="timestamp", description="event timestamps") | ||
| table.add_column(name="duration", description="event durations") | ||
| table.add_row(timestamp=0.0, duration=10.0) | ||
| table.add_row(timestamp=one_year, duration=1000.0) | ||
|
|
||
| result = check_table_time_columns_duration(table) | ||
| assert result is not None | ||
| assert "exceeds the threshold" in result.message | ||
|
Comment on lines
+549
to
+570
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment in |
||
|
|
||
|
|
||
| def test_check_table_time_columns_duration_no_time_columns(): | ||
| """Test that tables without time columns pass.""" | ||
| table = DynamicTable(name="test_table", description="test") | ||
| table.add_column(name="value", description="some data") | ||
| table.add_row(value=123) | ||
| table.add_row(value=456) | ||
|
|
||
| assert check_table_time_columns_duration(table) is None | ||
Uh oh!
There was an error while loading. Please reload this page.