Skip to content

Commit a666e8d

Browse files
Copilotedeno
andcommitted
Implement headstage sensor data separation with units and scaling
Co-authored-by: edeno <[email protected]>
1 parent 75cf148 commit a666e8d

File tree

7 files changed

+564
-45
lines changed

7 files changed

+564
-45
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
experimenter_name:
2+
- lastname, firstname
3+
lab: Loren Frank Lab
4+
institution: UCSF
5+
experiment_description: Test Conversion with Headstage Sensor Units
6+
session_description: test yaml insertion with separate sensor TimeSeries
7+
session_id: "12345"
8+
keywords:
9+
- testing
10+
- headstage_sensors
11+
subject:
12+
description: Long-Evans Rat
13+
genotype: Obese Prone CD Rat
14+
sex: M
15+
species: Rattus pyctoris
16+
subject_id: "54321"
17+
date_of_birth: 2000-01-01T00:00:00.000Z
18+
weight: 100
19+
data_acq_device:
20+
- name: SpikeGadgets
21+
system: SpikeGadgets
22+
amplifier: Intan
23+
adc_circuit: Intan
24+
cameras:
25+
- id: 0
26+
meters_per_pixel: 0.001
27+
manufacturer: Allied Vision
28+
model: model1
29+
lens: lens1
30+
camera_name: test camera 1
31+
tasks:
32+
- task_name: Sleep
33+
task_description: sleeping
34+
task_environment: sleep box
35+
camera_id:
36+
- 0
37+
task_epochs:
38+
- 1
39+
associated_files: []
40+
associated_video_files: []
41+
42+
# Updated units specification with individual sensor units
43+
units:
44+
analog: "unspecified" # Default for uncategorized analog channels
45+
behavioral_events: "unspecified" # Default for DIO events
46+
47+
# Optional: specify custom units for specific sensor types
48+
sensor_units:
49+
accelerometer: "g" # Will override default
50+
gyroscope: "d/s" # Will override default
51+
magnetometer: "unspecified"
52+
analog_input: "V" # For ECU analog inputs
53+
54+
times_period_multiplier: 1
55+
raw_data_to_volts: 1.95e-07
56+
default_header_file_path: /stelmo/sam/test_data
57+
58+
device:
59+
name:
60+
- device1
61+
62+
# Enhanced behavioral_events with individual unit specifications
63+
behavioral_events:
64+
- description: Din1
65+
name: Light_1
66+
comments: Indicator for reward delivery
67+
unit: "unspecified" # Digital events are dimensionless
68+
- description: Din2
69+
name: Light_2
70+
unit: "unspecified"
71+
- description: Dout2
72+
name: Poke_1
73+
unit: "unspecified"
74+
- description: ECU_Ain1
75+
name: Analog_Input_1
76+
comments: Custom analog input channel
77+
unit: "V" # Voltage
78+
- description: ECU_Ain2
79+
name: Analog_Input_2
80+
comments: Another analog input
81+
unit: "mV" # Millivolts
82+
83+
electrode_groups: []
84+
ntrode_electrode_group_channel_map: []

src/trodes_to_nwb/convert.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ def _create_nwb(
309309
rec_filepaths,
310310
timestamps=rec_dci_timestamps,
311311
behavior_only=behavior_only,
312+
metadata=metadata,
312313
)
313314
logger.info("ADDING SAMPLE COUNTS")
314315
add_sample_count(nwb_file, rec_dci)

src/trodes_to_nwb/convert_analog.py

Lines changed: 179 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,230 @@
11
"""Module for handling the conversion of ECU analog and headstage sensor data streams from Trodes .rec files to NWB format."""
22

3+
import re
34
from xml.etree import ElementTree
45

56
import numpy as np
67
import pynwb
78
from hdmf.backends.hdf5 import H5DataIO
8-
from pynwb import NWBFile
9+
from pynwb import NWBFile, TimeSeries
910

1011
from trodes_to_nwb import convert_rec_header
1112
from trodes_to_nwb.convert_ephys import RecFileDataChunkIterator
1213

1314
DEFAULT_CHUNK_TIME_DIM = 16384
1415
DEFAULT_CHUNK_MAX_CHANNEL_DIM = 32
1516

17+
# Sensor type definitions with scaling factors and units
18+
SENSOR_TYPE_CONFIG = {
19+
'accelerometer': {
20+
'pattern': r'Headstage_Accel[XYZ]',
21+
'scaling_factor': 0.000061, # Convert to g units
22+
'unit': 'g',
23+
'description': 'Headstage accelerometer data'
24+
},
25+
'gyroscope': {
26+
'pattern': r'Headstage_Gyro[XYZ]',
27+
'scaling_factor': 0.061, # Convert to degrees/second
28+
'unit': 'd/s',
29+
'description': 'Headstage gyroscope data'
30+
},
31+
'magnetometer': {
32+
'pattern': r'Headstage_Mag[XYZ]',
33+
'scaling_factor': 1.0, # No scaling specified in issue
34+
'unit': 'unspecified',
35+
'description': 'Headstage magnetometer data'
36+
},
37+
'analog_input': {
38+
'pattern': r'(ECU_Ain\d+|Controller_Ain\d+)',
39+
'scaling_factor': 1.0,
40+
'unit': 'unspecified',
41+
'description': 'Analog input channel'
42+
}
43+
}
44+
45+
46+
def _categorize_sensor_channels(channel_names: list[str]) -> dict[str, list[str]]:
47+
"""Categorize sensor channels by type based on naming patterns.
48+
49+
Parameters
50+
----------
51+
channel_names : list[str]
52+
List of channel names to categorize
53+
54+
Returns
55+
-------
56+
dict[str, list[str]]
57+
Dictionary mapping sensor types to lists of channel names
58+
"""
59+
categorized = {}
60+
61+
for sensor_type, config in SENSOR_TYPE_CONFIG.items():
62+
pattern = config['pattern']
63+
matching_channels = [name for name in channel_names if re.match(pattern, name)]
64+
if matching_channels:
65+
categorized[sensor_type] = matching_channels
66+
67+
# Handle uncategorized channels
68+
categorized_flat = [name for channels in categorized.values() for name in channels]
69+
uncategorized = [name for name in channel_names if name not in categorized_flat]
70+
if uncategorized:
71+
categorized['other'] = uncategorized
72+
73+
return categorized
74+
75+
76+
def _create_sensor_timeseries(
77+
sensor_type: str,
78+
channel_names: list[str],
79+
data: np.ndarray,
80+
timestamps: np.ndarray,
81+
metadata: dict = None
82+
) -> TimeSeries:
83+
"""Create a TimeSeries object for a specific sensor type.
84+
85+
Parameters
86+
----------
87+
sensor_type : str
88+
Type of sensor (accelerometer, gyroscope, etc.)
89+
channel_names : list[str]
90+
Names of channels for this sensor type
91+
data : np.ndarray
92+
Raw sensor data
93+
timestamps : np.ndarray
94+
Timestamps for the data
95+
metadata : dict, optional
96+
Metadata dictionary for custom units/scaling
97+
98+
Returns
99+
-------
100+
TimeSeries
101+
Configured TimeSeries object for the sensor type
102+
"""
103+
config = SENSOR_TYPE_CONFIG.get(sensor_type, {
104+
'scaling_factor': 1.0,
105+
'unit': 'unspecified',
106+
'description': f'{sensor_type} data'
107+
})
108+
109+
# Apply scaling factor
110+
scaled_data = data * config['scaling_factor']
111+
112+
# Create description with channel names
113+
description = f"{config['description']}: {', '.join(channel_names)}"
114+
115+
# Use custom units from metadata if available
116+
unit = config['unit']
117+
if metadata and 'sensor_units' in metadata and sensor_type in metadata['sensor_units']:
118+
unit = metadata['sensor_units'][sensor_type]
119+
120+
return TimeSeries(
121+
name=sensor_type,
122+
description=description,
123+
data=scaled_data,
124+
unit=unit,
125+
timestamps=timestamps,
126+
)
127+
16128

17129
def add_analog_data(
18130
nwbfile: NWBFile,
19131
rec_file_path: list[str],
20132
timestamps: np.ndarray = None,
21133
behavior_only: bool = False,
134+
metadata: dict = None,
22135
**kwargs,
23136
) -> None:
24-
"""Adds analog streams to the nwb file.
137+
"""Adds analog streams to the nwb file as separate TimeSeries objects for each sensor type.
25138
26139
Parameters
27140
----------
28141
nwbfile : NWBFile
29142
nwb file being assembled
30-
recfile : list[str]
143+
rec_file_path : list[str]
31144
ordered list of file paths to all recfiles with session's data
145+
timestamps : np.ndarray, optional
146+
timestamps for the data
147+
behavior_only : bool, optional
148+
if True, only include behavioral data
149+
metadata : dict, optional
150+
metadata dictionary for custom units and scaling
32151
"""
33-
# TODO: ADD HEADSTAGE DATA
34-
35-
# get the ids of the analog channels from the first rec file header
152+
# Get the ids of the analog channels from the first rec file header
36153
root = convert_rec_header.read_header(rec_file_path[0])
37154
hconf = root.find("HardwareConfiguration")
38155
ecu_conf = None
39156
for conf in hconf:
40157
if conf.attrib["name"] == "ECU":
41158
ecu_conf = conf
42159
break
43-
analog_channel_ids = []
160+
161+
# Get ECU analog channel IDs
162+
ecu_analog_channel_ids = []
44163
for channel in ecu_conf:
45164
if channel.attrib["dataType"] == "analog":
46-
analog_channel_ids.append(channel.attrib["id"])
165+
ecu_analog_channel_ids.append(channel.attrib["id"])
47166

48-
# make the data chunk iterator
49-
# TODO use the stream name instead of the stream index to be more robust
167+
# Make the data chunk iterator for ECU analog data
50168
rec_dci = RecFileDataChunkIterator(
51169
rec_file_path,
52-
nwb_hw_channel_order=analog_channel_ids,
170+
nwb_hw_channel_order=ecu_analog_channel_ids,
53171
stream_id="ECU_analog",
54172
is_analog=True,
55173
timestamps=timestamps,
56174
behavior_only=behavior_only,
57175
)
58176

59-
# add headstage channel IDs to the list of analog channel IDs
60-
analog_channel_ids.extend(rec_dci.neo_io[0].multiplexed_channel_xml.keys())
61-
62-
# (16384, 32) chunks of dtype int16 (2 bytes) is 1 MB, which is recommended
63-
# by studies by the NWB team.
64-
# could also add compression here. zstd/blosc-zstd are recommended by the NWB team, but
65-
# they require the hdf5plugin library to be installed. gzip is available by default.
66-
data_data_io = H5DataIO(
67-
rec_dci,
68-
chunks=(
69-
DEFAULT_CHUNK_TIME_DIM,
70-
min(len(analog_channel_ids), DEFAULT_CHUNK_MAX_CHANNEL_DIM),
71-
),
72-
)
177+
# Get headstage sensor channel IDs from multiplexed channels
178+
headstage_channel_ids = list(rec_dci.neo_io[0].multiplexed_channel_xml.keys())
179+
all_analog_channel_ids = ecu_analog_channel_ids + headstage_channel_ids
73180

74-
# make the objects to add to the nwb file
75-
nwbfile.create_processing_module(
76-
name="analog", description="Contains all analog data"
77-
)
78-
analog_events = pynwb.behavior.BehavioralEvents(name="analog")
79-
analog_events.add_timeseries(
80-
pynwb.TimeSeries(
81-
name="analog",
82-
description=__merge_row_description(
83-
analog_channel_ids
84-
), # NOTE: matches rec_to_nwb system
85-
data=data_data_io,
86-
timestamps=rec_dci.timestamps,
87-
unit="-1",
88-
)
89-
)
90-
# add it to the nwb file
91-
nwbfile.processing["analog"].add(analog_events)
181+
# Process ECU analog channels if any exist
182+
if ecu_analog_channel_ids:
183+
# Get ECU analog data (without headstage data)
184+
ecu_data = rec_dci._get_data((slice(None), slice(0, len(ecu_analog_channel_ids))))
185+
186+
# Categorize ECU analog channels
187+
ecu_categorized = _categorize_sensor_channels(ecu_analog_channel_ids)
188+
189+
# Create TimeSeries for each ECU sensor type
190+
for sensor_type, channel_names in ecu_categorized.items():
191+
channel_indices = [ecu_analog_channel_ids.index(name) for name in channel_names]
192+
sensor_data = ecu_data[:, channel_indices]
193+
194+
timeseries = _create_sensor_timeseries(
195+
sensor_type=f"ecu_{sensor_type}",
196+
channel_names=channel_names,
197+
data=sensor_data,
198+
timestamps=rec_dci.timestamps,
199+
metadata=metadata
200+
)
201+
202+
# Add to acquisition
203+
nwbfile.add_acquisition(timeseries)
204+
205+
# Process headstage sensor channels if any exist
206+
if headstage_channel_ids:
207+
# Get headstage sensor data
208+
headstage_data = rec_dci.neo_io[0].get_analogsignal_multiplexed(headstage_channel_ids)
209+
210+
# Categorize headstage channels by sensor type
211+
headstage_categorized = _categorize_sensor_channels(headstage_channel_ids)
212+
213+
# Create separate TimeSeries for each sensor type
214+
for sensor_type, channel_names in headstage_categorized.items():
215+
channel_indices = [headstage_channel_ids.index(name) for name in channel_names]
216+
sensor_data = headstage_data[:, channel_indices]
217+
218+
timeseries = _create_sensor_timeseries(
219+
sensor_type=sensor_type,
220+
channel_names=channel_names,
221+
data=sensor_data,
222+
timestamps=rec_dci.timestamps,
223+
metadata=metadata
224+
)
225+
226+
# Add to acquisition
227+
nwbfile.add_acquisition(timeseries)
92228

93229

94230
def __merge_row_description(row_ids: list[str]) -> str:

src/trodes_to_nwb/convert_dios.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,20 @@ def _get_channel_name_map(metadata: dict) -> dict[str, str]:
2929
raise ValueError(
3030
f"Duplicate channel name {dio_event['description']} in metadata YAML"
3131
)
32+
33+
# Get unit for this specific event type, or use default
34+
unit = "unspecified" # Default unit for digital events
35+
if "unit" in dio_event:
36+
unit = dio_event["unit"]
37+
elif "units" in metadata and "behavioral_events" in metadata["units"]:
38+
unit = metadata["units"]["behavioral_events"]
39+
3240
channel_name_map[dio_event["description"]] = {
3341
"name": dio_event["name"],
3442
"comments": (
3543
dio_event["comments"] if "comments" in dio_event else "no comments"
3644
),
45+
"unit": unit
3746
}
3847
return channel_name_map
3948

@@ -101,7 +110,7 @@ def add_dios(nwbfile: NWBFile, recfile: list[str], metadata: dict) -> None:
101110
comments=channel_name_map[channel_name]["comments"],
102111
description=channel_name,
103112
data=state_changes,
104-
unit="-1", # TODO change to "N/A",
113+
unit=channel_name_map[channel_name]["unit"],
105114
timestamps=timestamps, # TODO adjust timestamps
106115
)
107116
beh_events.add_timeseries(ts)

0 commit comments

Comments
 (0)