Skip to content

Commit 58c3d94

Browse files
authored
Fix apostrophe bug in spikeinterface module and improve memory efficiency (#1666)
1 parent b60ea98 commit 58c3d94

File tree

3 files changed

+41
-10
lines changed

3 files changed

+41
-10
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Fixed bug in `write_imaging_to_nwbfile` where `nwbfile` was incorrectly passed to `add_imaging_to_nwbfile` instead of the created/loaded nwbfile. [PR #1649](https://github.com/catalystneuro/neuroconv/pull/1649)
1111
* Fixed bug in `write_segmentation_to_nwbfile` where invalid `plane_num` parameter was passed to `add_segmentation_to_nwbfile`. [PR #1649](https://github.com/catalystneuro/neuroconv/pull/1649)
1212
* Fixed `get_json_schema_from_method_signature` to skip `*args` (VAR_POSITIONAL) parameters, which was causing schema validation errors when methods used the `*args` pattern for deprecating positional arguments. [PR #1647](https://github.com/catalystneuro/neuroconv/pull/1647)
13+
* Fixed a bug in unit table addition to nwbfile where `unit_name` values containing apostrophes could fail matching due to pandas `to_dataframe().query(...)` parsing. Replaced query-based matching with direct `unit_name` column mapping, with improved memory efficiency as a side effect. Added regression coverage for quoted unit names. [PR #1666](https://github.com/catalystneuro/neuroconv/pull/1666)
1314

1415
## Features
1516
* Added `roi_ids_to_add` parameter to `BaseSegmentationExtractorInterface.add_to_nwbfile()` to select a subset of ROIs during conversion, reducing file size by excluding rejected or unwanted ROIs. Also added `roi_ids` property to inspect available ROI IDs. Requires roiextractors >= 0.8.0. [PR #1658](https://github.com/catalystneuro/neuroconv/pull/1658)

src/neuroconv/tools/spikeinterface/spikeinterface.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,10 +1045,10 @@ def add_electrodes_to_nwbfile(
10451045
channel_map = _build_channel_id_to_electrodes_table_map(recording=recording, nwbfile=nwbfile)
10461046

10471047
# Get indices where this recording's data goes (all should be found now)
1048-
all_indices = np.arange(electrode_table_size)
10491048
channel_ids = recording.get_channel_ids()
10501049
indices_for_new_data = [channel_map[channel_id] for channel_id in channel_ids]
1051-
indices_for_null_values = [index for index in all_indices if index not in indices_for_new_data]
1050+
new_indices_set = set(indices_for_new_data)
1051+
indices_for_null_values = [index for index in range(electrode_table_size) if index not in new_indices_set]
10521052
extending_column = len(indices_for_null_values) > 0
10531053

10541054
# Add properties as columns (exclude channel_name and electrode_name as they were handled above)
@@ -2173,14 +2173,17 @@ def _add_units_table_to_nwbfile(
21732173
cols_args["data"] = extended_data
21742174
units_table.add_column("unit_name", **cols_args)
21752175

2176-
# Build a channel name to electrode table index map
2177-
table_df = units_table.to_dataframe().reset_index()
2178-
unit_name_to_electrode_index = {
2179-
unit_name: table_df.query(f"unit_name=='{unit_name}'").index[0] for unit_name in unit_name_array
2180-
}
2176+
# Build a unit_name to units table row index map directly from the table column.
2177+
# This avoids materializing a pandas DataFrame and query parsing/casting pitfalls.
2178+
unit_names_in_table = units_table["unit_name"][:]
2179+
unit_name_to_electrode_index = {}
2180+
for index, unit_name in enumerate(unit_names_in_table):
2181+
if unit_name not in unit_name_to_electrode_index:
2182+
unit_name_to_electrode_index[unit_name] = index
21812183

21822184
indices_for_new_data = [unit_name_to_electrode_index[unit_name] for unit_name in unit_name_array]
2183-
indices_for_null_values = table_df.index.difference(indices_for_new_data).values
2185+
new_indices_set = set(indices_for_new_data)
2186+
indices_for_null_values = [index for index in range(unit_table_size) if index not in new_indices_set]
21842187
extending_column = len(indices_for_null_values) > 0
21852188

21862189
# Add properties as columns
@@ -2871,8 +2874,11 @@ def _get_electrode_group_indices(recording, nwbfile):
28712874
if group_names is None:
28722875
electrode_group_indices = None
28732876
else:
2874-
group_names = [str(group_name) for group_name in group_names]
2875-
electrode_group_indices = nwbfile.electrodes.to_dataframe().query(f"group_name in {group_names}").index.values
2877+
group_names_set = {str(group_name) for group_name in group_names}
2878+
table_group_names = nwbfile.electrodes["group_name"][:]
2879+
electrode_group_indices = np.array(
2880+
[index for index, group_name in enumerate(table_group_names) if str(group_name) in group_names_set]
2881+
)
28762882
return electrode_group_indices
28772883

28782884

tests/test_modalities/test_ecephys/test_tools_spikeinterface.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,6 +1673,30 @@ def test_non_overwriting_unit_names_sorting_property(self):
16731673
unit_names_in_units_table = list(self.nwbfile.units["unit_name"].data)
16741674
self.assertListEqual(unit_names_in_units_table, expected_unit_names_in_units_table)
16751675

1676+
def test_property_matching_by_unit_name_with_quotes(self):
1677+
"""Ensure matching by unit_name works when names contain apostrophes.
1678+
1679+
This test was added in PR #1666, which removed pandas DataFrame/query matching
1680+
(`to_dataframe().query(...)`) from units-table extension logic. It protects
1681+
against regressions to query-string based matching (e.g., `pandas.query`),
1682+
which can fail or mis-parse when unit_name contains quotes.
1683+
"""
1684+
quoted_unit_names = ["unit'a", "unit'b", "unit'c", "unit'd"]
1685+
self.sorting_1.set_property(key="unit_name", values=quoted_unit_names)
1686+
self.sorting_1.set_property(key="property", values=["value_a", "value_b", "value_c", "value_d"])
1687+
1688+
add_sorting_to_nwbfile(sorting=self.sorting_1, nwbfile=self.nwbfile)
1689+
1690+
self.sorting_2.set_property(key="unit_name", values=["unit'c", "unit'd", "unit'e", "unit'f"])
1691+
self.sorting_2.set_property(key="property", values=["value_c2", "value_d2", "value_e", "value_f"])
1692+
1693+
add_sorting_to_nwbfile(sorting=self.sorting_2, nwbfile=self.nwbfile)
1694+
1695+
expected_unit_names = ["unit'a", "unit'b", "unit'c", "unit'd", "unit'e", "unit'f"]
1696+
expected_property_values = ["value_a", "value_b", "value_c", "value_d", "value_e", "value_f"]
1697+
self.assertListEqual(list(self.nwbfile.units["unit_name"].data), expected_unit_names)
1698+
self.assertListEqual(list(self.nwbfile.units["property"].data), expected_property_values)
1699+
16761700
def test_integer_unit_names_overwrite(self):
16771701
"""Ensure unit names merge correctly after appending when unit names are integers."""
16781702
unit_ids = self.base_sorting.get_unit_ids()

0 commit comments

Comments
 (0)