Skip to content

Commit cff697b

Browse files
Convert behavior only recordings (#134)
* behavior_only option * docstring and notebook update * add test and better error message * update data cache key to force new download * suggestions from review * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: Copilot <[email protected]>
1 parent 6dcbc97 commit cff697b

File tree

9 files changed

+140
-32
lines changed

9 files changed

+140
-32
lines changed

.github/workflows/test_package_build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ jobs:
104104
with:
105105
# Path to cache
106106
path: ${{ env.DOWNLOAD_DIR }}
107-
# Cache key: OS + static string + version number (bump v1 to v2 to invalidate)
108-
key: ${{ runner.os }}-testdata-trodes-v1
107+
# Cache key: OS + static string + version number (bump v2 to v3 to invalidate)
108+
key: ${{ runner.os }}-testdata-trodes-v2
109109

110110
# --- Download Test Data Step (Conditional) ---
111111
- name: Download test rec files

notebooks/conversion_tutorial.ipynb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@
4848
"Where the nwb file will be saved. For Frank Lab members, this should be `/stelmo/nwb/raw/`. File is named by the convention {animal}{date}.nwb\n",
4949
"\n",
5050
"+ `query_expression`: A query expression to select which files to convert. For example, if you have several animals in your folder, you could write `\"animal == 'sample'\"` to select only the sample animal. Defaults to `None` which converts all files in the directory (potentially overwriting ones you've done before!).\n",
51+
"\n",
52+
"+ `behavior_only`: Rec files recorded through trodes software without e-phys data have a\n",
53+
" different expected internal structure. Use this flag to ensure correct data parsing \n",
54+
" is used. Default of `False` expects electrophysiology data to be present\n",
5155
"\n"
5256
]
5357
},

src/trodes_to_nwb/convert.py

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def create_nwbs(
112112
n_workers: int = 1,
113113
query_expression: str | None = None,
114114
disable_ptp: bool = False,
115+
behavior_only: bool = False,
115116
):
116117
"""
117118
Convert SpikeGadgets data to NWB format.
@@ -138,6 +139,9 @@ def create_nwbs(
138139
See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html.
139140
disable_ptp : bool, optional
140141
Blocks use of ptp timestamps regardless of rec header, by default False.
142+
behavior_only : bool, optional
143+
Flag to indicate only behaviorsl data (no ephys) was collected in the rec
144+
files, by default False.
141145
142146
"""
143147

@@ -166,6 +170,7 @@ def pass_func(args):
166170
output_dir,
167171
video_directory,
168172
convert_video,
173+
behavior_only=behavior_only,
169174
)
170175
return True
171176
except Exception as e:
@@ -194,6 +199,7 @@ def pass_func(args):
194199
video_directory,
195200
convert_video,
196201
disable_ptp,
202+
behavior_only=behavior_only,
197203
)
198204

199205

@@ -206,6 +212,7 @@ def _create_nwb(
206212
video_directory: str = "",
207213
convert_video: bool = False,
208214
disable_ptp: bool = False,
215+
behavior_only: bool = False,
209216
):
210217
# create loggers
211218
logger = setup_logger("convert", f"{session[1]}{session[0]}_convert.log")
@@ -217,7 +224,10 @@ def _create_nwb(
217224
logger.info("CREATING REC DATA ITERATORS")
218225
# make generic rec file data chunk iterator to pass to functions
219226
rec_dci = RecFileDataChunkIterator(
220-
rec_filepaths, interpolate_dropped_packets=False, stream_id="trodes"
227+
rec_filepaths,
228+
interpolate_dropped_packets=False,
229+
stream_id="ECU_analog" if behavior_only else "trodes",
230+
behavior_only=behavior_only,
221231
)
222232
rec_dci_timestamps = (
223233
rec_dci.timestamps
@@ -263,30 +273,36 @@ def _create_nwb(
263273
add_acquisition_devices(nwb_file, metadata)
264274
add_tasks(nwb_file, metadata)
265275
add_associated_files(nwb_file, metadata)
266-
add_electrode_groups(
267-
nwb_file, metadata, device_metadata, hw_channel_map, ref_electrode_map
268-
)
269276
add_header_device(nwb_file, rec_header)
270277
add_associated_video_files(
271278
nwb_file, metadata, session_df, video_directory, convert_video
272279
)
273280
add_optogenetics(nwb_file, metadata, device_metadata)
274281

275-
logger.info("ADDING EPHYS DATA")
276-
# add rec file data
277-
map_row_ephys_data_to_row_electrodes_table = list(
278-
range(len(nwb_file.electrodes))
279-
) # TODO: Double check this
280-
add_raw_ephys(
281-
nwb_file,
282-
rec_filepaths,
283-
map_row_ephys_data_to_row_electrodes_table,
284-
metadata,
285-
)
282+
if not behavior_only:
283+
add_electrode_groups(
284+
nwb_file, metadata, device_metadata, hw_channel_map, ref_electrode_map
285+
)
286+
logger.info("ADDING EPHYS DATA")
287+
# add rec file data
288+
map_row_ephys_data_to_row_electrodes_table = list(
289+
range(len(nwb_file.electrodes))
290+
) # TODO: Double check this
291+
add_raw_ephys(
292+
nwb_file,
293+
rec_filepaths,
294+
map_row_ephys_data_to_row_electrodes_table,
295+
metadata,
296+
)
286297
logger.info("ADDING DIO DATA")
287298
add_dios(nwb_file, rec_filepaths, metadata)
288299
logger.info("ADDING ANALOG DATA")
289-
add_analog_data(nwb_file, rec_filepaths, timestamps=rec_dci_timestamps)
300+
add_analog_data(
301+
nwb_file,
302+
rec_filepaths,
303+
timestamps=rec_dci_timestamps,
304+
behavior_only=behavior_only,
305+
)
290306
logger.info("ADDING SAMPLE COUNTS")
291307
add_sample_count(nwb_file, rec_dci)
292308
logger.info("ADDING EPOCHS")

src/trodes_to_nwb/convert_analog.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515

1616

1717
def add_analog_data(
18-
nwbfile: NWBFile, rec_file_path: list[str], timestamps: np.ndarray = None, **kwargs
18+
nwbfile: NWBFile,
19+
rec_file_path: list[str],
20+
timestamps: np.ndarray = None,
21+
behavior_only: bool = False,
22+
**kwargs,
1923
) -> None:
2024
"""Adds analog streams to the nwb file.
2125
@@ -46,9 +50,10 @@ def add_analog_data(
4650
rec_dci = RecFileDataChunkIterator(
4751
rec_file_path,
4852
nwb_hw_channel_order=analog_channel_ids,
49-
stream_index=2,
53+
stream_id="ECU_analog",
5054
is_analog=True,
5155
timestamps=timestamps,
56+
behavior_only=behavior_only,
5257
)
5358

5459
# add headstage channel IDs to the list of analog channel IDs

src/trodes_to_nwb/convert_ephys.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def __init__(
4747
is_analog: bool = False,
4848
interpolate_dropped_packets: bool = False,
4949
timestamps=None, # Use this if you already have timestamps from intializing another rec iterator on the same files
50+
behavior_only: bool = False,
5051
**kwargs,
5152
):
5253
"""
@@ -69,6 +70,8 @@ def __init__(
6970
whether to interpolate single dropped packets, by default False
7071
timestamps : [type], optional
7172
timestamps to use. Can provide efficiency improvements by skipping recalculating timestamps from rec files, by default None
73+
behavior_only : bool, optional
74+
indicate if file contains only behavior data (no e-phys), by default False
7275
kwargs : dict
7376
additional arguments to pass to GenericDataChunkIterator
7477
"""
@@ -94,7 +97,15 @@ def __init__(
9497
# trodes
9598
assert all([neo_io.block_count() == 1 for neo_io in self.neo_io])
9699
assert all([neo_io.segment_count(0) == 1 for neo_io in self.neo_io])
97-
assert all([neo_io.signal_streams_count() == 4 for neo_io in self.neo_io])
100+
assert all(
101+
[
102+
neo_io.signal_streams_count() == 4 - behavior_only
103+
for neo_io in self.neo_io
104+
]
105+
), (
106+
"Unexpected number of signal streams. "
107+
+ "Confirm whether behavior_only is set correctly for this recording"
108+
)
98109

99110
self.block_index = 0
100111
self.seg_index = 0
@@ -113,6 +124,10 @@ def __init__(
113124
else: # if stream id is not provided
114125
stream_id = self.neo_io[0].get_stream_id_from_index(stream_index)
115126

127+
if behavior_only and stream_id == "trodes":
128+
raise ValueError(
129+
"Behavior only recordings do not contain a `trodes` stream"
130+
)
116131
self.stream_id = stream_id
117132
self.stream_index = stream_index
118133

src/trodes_to_nwb/convert_optogenetics.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,15 @@ def add_optogenetics(nwbfile: NWBFile, metadata: dict, device_metadata: List[dic
3232
List of dictionaries containing metadata for devices used in the experiment.
3333
"""
3434
logger = logging.getLogger("convert")
35-
if not all(
36-
[
37-
x in metadata
38-
for x in [
39-
"virus_injection",
40-
"opto_excitation_source",
41-
"optical_fiber",
42-
"optogenetic_stimulation_software",
43-
]
44-
]
35+
necessary_metadata = [
36+
"virus_injection",
37+
"opto_excitation_source",
38+
"optical_fiber",
39+
"optogenetic_stimulation_software",
40+
]
41+
42+
if not (
43+
all([((x in metadata) and len(metadata[x]) > 0) for x in necessary_metadata])
4544
):
4645
logger.info("No available optogenetic metadata")
4746
return

src/trodes_to_nwb/convert_position.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,12 @@ def add_associated_video_files(
12501250
epoch = video_metadata["task_epochs"][0]
12511251
# get the video file path
12521252
video_path = None
1253-
for file in session_df[session_df.file_extension == ".h264"].full_path:
1253+
for file in session_df[
1254+
np.logical_or(
1255+
session_df.file_extension == ".h264",
1256+
session_df.file_extension == ".mp4",
1257+
)
1258+
].full_path:
12541259
if video_metadata["name"].rsplit(".", 1)[0] in file:
12551260
video_path = file
12561261
break

src/trodes_to_nwb/data_scanner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"rec", # binary file containing the ephys recording, accelerometer, gyroscope, magnetometer, DIO data, header
1313
"videoPositionTracking", # trodes tracked position
1414
"h264", # video file
15+
"mp4", # video file
1516
"cameraHWSync", # position timestamps
1617
"stateScriptLog", # state script controls the experimenter parameters
1718
"yml", # metadata file
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import pytest
2+
3+
from trodes_to_nwb.convert_ephys import RecFileDataChunkIterator
4+
from trodes_to_nwb.tests.utils import data_path
5+
6+
7+
def test_behavior_only_rec_file():
8+
file = data_path / "behavior_only.rec"
9+
# accessing trodes stream with behavior only file should fail
10+
with pytest.raises(IndexError):
11+
RecFileDataChunkIterator(
12+
rec_file_path=[file],
13+
interpolate_dropped_packets=True,
14+
stream_id="trodes",
15+
behavior_only=True,
16+
)
17+
18+
# misspecification of behavior-only should result in mismatched stream number
19+
with pytest.raises(AssertionError):
20+
RecFileDataChunkIterator(
21+
rec_file_path=[file],
22+
interpolate_dropped_packets=True,
23+
stream_id="trodes",
24+
behavior_only=False,
25+
)
26+
27+
# correctly build iterator
28+
rec_dci = RecFileDataChunkIterator(
29+
rec_file_path=[file],
30+
interpolate_dropped_packets=True,
31+
stream_id="ECU_analog",
32+
behavior_only=True,
33+
)
34+
neo_io = rec_dci.neo_io[0]
35+
36+
# check file streams
37+
stream_names = [stream[0] for stream in neo_io.header["signal_streams"]]
38+
assert all(
39+
[
40+
x in stream_names
41+
for x in ["ECU_analog", "ECU_digital", "Controller_DIO_digital"]
42+
]
43+
), "missing expected stream in iterator"
44+
assert "trodes" not in stream_names, "unexpected trodes stream in iterator"
45+
46+
# check data accesses
47+
assert rec_dci.timestamps.size == 433012
48+
assert rec_dci.timestamps[-1] == 1751195974.5656028, "unexpected last timestamp"
49+
assert set(neo_io.multiplexed_channel_xml.keys()) == set(
50+
[
51+
"Headstage_AccelX",
52+
"Headstage_AccelY",
53+
"Headstage_AccelZ",
54+
"Headstage_GyroX",
55+
"Headstage_GyroY",
56+
"Headstage_GyroZ",
57+
"Headstage_MagX",
58+
"Headstage_MagY",
59+
"Headstage_MagZ",
60+
"Controller_Ain1",
61+
]
62+
)
63+
assert neo_io._raw_memmap.shape == (433012, 54)

0 commit comments

Comments
 (0)