Skip to content

Commit e0562d4

Browse files
authored
Merge pull request #4 from maestroque/bids-support
Add Physio object generation from BIDS
2 parents 0581dc2 + ffa8e71 commit e0562d4

File tree

6 files changed

+312
-7
lines changed

6 files changed

+312
-7
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,7 @@ dmypy.json
125125
.pyre/
126126

127127
.vscode/
128+
129+
# Test Data
130+
physutils/tests/data/bids-dir
131+
tmp.*

physutils/io.py

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,143 @@
55

66
import importlib
77
import json
8+
import os
89
import os.path as op
910

1011
import numpy as np
12+
from bids import BIDSLayout
1113
from loguru import logger
1214

1315
from physutils import physio
1416

1517
EXPECTED = ["data", "fs", "history", "metadata"]
1618

1719

20+
def load_from_bids(
21+
bids_path,
22+
subject,
23+
session=None,
24+
task=None,
25+
run=None,
26+
recording=None,
27+
extension="tsv.gz",
28+
suffix="physio",
29+
):
30+
"""
31+
Load physiological data from BIDS-formatted directory
32+
33+
Parameters
34+
----------
35+
bids_path : str
36+
Path to BIDS-formatted directory
37+
subject : str
38+
Subject identifier
39+
session : str
40+
Session identifier
41+
task : str
42+
Task identifier
43+
run : str
44+
Run identifier
45+
suffix : str
46+
Suffix of file to load
47+
48+
Returns
49+
-------
50+
data : :class:`physutils.Physio`
51+
Loaded physiological data
52+
"""
53+
54+
# check if file exists and is in BIDS format
55+
if not op.exists(bids_path):
56+
raise FileNotFoundError(f"Provided path {bids_path} does not exist")
57+
58+
layout = BIDSLayout(bids_path, validate=False)
59+
bids_file = layout.get(
60+
subject=subject,
61+
session=session,
62+
task=task,
63+
run=run,
64+
suffix=suffix,
65+
extension=extension,
66+
recording=recording,
67+
)
68+
logger.debug(f"BIDS file found: {bids_file}")
69+
if len(bids_file) == 0:
70+
raise FileNotFoundError(
71+
f"No files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
72+
)
73+
if len(bids_file) > 1:
74+
raise ValueError(
75+
f"Multiple files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
76+
)
77+
78+
config_file = bids_file[0].get_metadata()
79+
fs = config_file["SamplingFrequency"]
80+
t_start = config_file["StartTime"] if "StartTime" in config_file else 0
81+
columns = config_file["Columns"]
82+
logger.debug(f"Loaded structure contains columns: {columns}")
83+
84+
physio_objects = {}
85+
data = np.loadtxt(bids_file[0].path)
86+
87+
if "time" in columns:
88+
idx_0 = np.argmax(data[:, columns.index("time")] >= t_start)
89+
else:
90+
idx_0 = 0
91+
logger.warning(
92+
"No time column found in file. Assuming data starts at the beginning of the file"
93+
)
94+
95+
for col in columns:
96+
col_physio_type = None
97+
if any([x in col.lower() for x in ["cardiac", "ppg", "ecg", "card", "pulse"]]):
98+
col_physio_type = "cardiac"
99+
elif any(
100+
[
101+
x in col.lower()
102+
for x in ["respiratory", "rsp", "resp", "breath", "co2", "o2"]
103+
]
104+
):
105+
col_physio_type = "respiratory"
106+
elif any([x in col.lower() for x in ["trigger", "tr"]]):
107+
col_physio_type = "trigger"
108+
elif any([x in col.lower() for x in ["time"]]):
109+
continue
110+
else:
111+
logger.warning(
112+
f"Column {col}'s type cannot be determined. Additional features may be missing."
113+
)
114+
115+
if col_physio_type in ["cardiac", "respiratory"]:
116+
physio_objects[col] = physio.Physio(
117+
data[idx_0:, columns.index(col)],
118+
fs=fs,
119+
history=[physio._get_call(exclude=[])],
120+
)
121+
physio_objects[col]._physio_type = col_physio_type
122+
physio_objects[col]._label = (
123+
bids_file[0].filename.split(".")[0].replace("_physio", "")
124+
)
125+
126+
if col_physio_type == "trigger":
127+
# TODO: Implement trigger loading using the MRI data object
128+
logger.warning("MRI trigger characteristics extraction not yet implemented")
129+
physio_objects[col] = physio.Physio(
130+
data[idx_0:, columns.index(col)],
131+
fs=fs,
132+
history=[physio._get_call(exclude=[])],
133+
)
134+
135+
return physio_objects
136+
137+
18138
def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):
19139
"""
20140
Returns `Physio` object with provided data
21141
22142
Parameters
23143
----------
24-
data : str or array_like or Physio_like
144+
data : str, os.path.PathLike or array_like or Physio_like
25145
Input physiological data. If array_like, should be one-dimensional
26146
fs : float, optional
27147
Sampling rate of `data`. Default: None
@@ -46,7 +166,7 @@ def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):
46166

47167
# first check if the file was made with `save_physio`; otherwise, try to
48168
# load it as a plain text file and instantiate a history
49-
if isinstance(data, str):
169+
if isinstance(data, str) or isinstance(data, os.PathLike):
50170
try:
51171
inp = dict(np.load(data, allow_pickle=allow_pickle))
52172
for attr in EXPECTED:

physutils/physio.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,12 @@ def new_physio_like(
220220

221221
if suppdata is None:
222222
suppdata = ref_physio._suppdata if copy_suppdata else None
223-
223+
224224
label = ref_physio.label if copy_label else None
225225
physio_type = ref_physio.physio_type if copy_physio_type else None
226-
computed_metrics = list(ref_physio.computed_metrics) if copy_computed_metrics else []
226+
computed_metrics = (
227+
dict(ref_physio.computed_metrics) if copy_computed_metrics else {}
228+
)
227229

228230
# make new class
229231
out = ref_physio.__class__(
@@ -340,7 +342,7 @@ def __init__(
340342
reject=np.empty(0, dtype=int),
341343
)
342344
self._suppdata = None if suppdata is None else np.asarray(suppdata).squeeze()
343-
self._computed_metrics = []
345+
self._computed_metrics = dict()
344346

345347
def __array__(self):
346348
return self.data
@@ -542,3 +544,49 @@ def neurokit2phys(
542544
metadata = dict(peaks=peaks)
543545

544546
return cls(data, fs=fs, metadata=metadata, **kwargs)
547+
548+
549+
class MRIConfig:
550+
"""
551+
Class to hold MRI configuration information
552+
553+
Parameters
554+
----------
555+
slice_timings : 1D array_like
556+
Slice timings in seconds
557+
n_scans : int
558+
Number of volumes in the MRI scan
559+
tr : float
560+
Repetition time in seconds
561+
"""
562+
563+
def __init__(self, slice_timings=None, n_scans=None, tr=None):
564+
if np.ndim(slice_timings) > 1:
565+
raise ValueError("Slice timings must be a 1-dimensional array.")
566+
567+
self._slice_timings = np.asarray(slice_timings)
568+
self._n_scans = int(n_scans)
569+
self._tr = float(tr)
570+
logger.debug(f"Initializing new MRIConfig object: {self}")
571+
572+
def __str__(self):
573+
return "{name}(n_scans={n_scans}, tr={tr})".format(
574+
name=self.__class__.__name__,
575+
n_scans=self._n_scans,
576+
tr=self._tr,
577+
)
578+
579+
@property
580+
def slice_timings(self):
581+
"""Slice timings in seconds"""
582+
return self._slice_timings
583+
584+
@property
585+
def n_scans(self):
586+
"""Number of volumes in the MRI scan"""
587+
return self._n_scans
588+
589+
@property
590+
def tr(self):
591+
"""Repetition time in seconds"""
592+
return self._tr

physutils/tests/test_io.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77
import pytest
88

99
from physutils import io, physio
10-
from physutils.tests.utils import filter_physio, get_test_data_path
10+
from physutils.tests.utils import (
11+
create_random_bids_structure,
12+
filter_physio,
13+
get_test_data_path,
14+
)
1115

1216

1317
def test_load_physio(caplog):
@@ -46,6 +50,43 @@ def test_load_physio(caplog):
4650
io.load_physio([1, 2, 3])
4751

4852

53+
def test_load_from_bids():
54+
create_random_bids_structure("physutils/tests/data", recording_id="cardiac")
55+
phys_array = io.load_from_bids(
56+
"physutils/tests/data/bids-dir",
57+
subject="01",
58+
session="01",
59+
task="rest",
60+
run="01",
61+
recording="cardiac",
62+
)
63+
64+
for col in phys_array.keys():
65+
assert isinstance(phys_array[col], physio.Physio)
66+
# The data saved are the ones after t_0 = -3s
67+
assert phys_array[col].data.size == 80000
68+
assert phys_array[col].fs == 10000.0
69+
assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"
70+
71+
72+
def test_load_from_bids_no_rec():
73+
create_random_bids_structure("physutils/tests/data")
74+
phys_array = io.load_from_bids(
75+
"physutils/tests/data/bids-dir",
76+
subject="01",
77+
session="01",
78+
task="rest",
79+
run="01",
80+
)
81+
82+
for col in phys_array.keys():
83+
assert isinstance(phys_array[col], physio.Physio)
84+
# The data saved are the ones after t_0 = -3s
85+
assert phys_array[col].data.size == 80000
86+
assert phys_array[col].fs == 10000.0
87+
assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"
88+
89+
4990
def test_save_physio(tmpdir):
5091
pckl = io.load_physio(get_test_data_path("ECG.phys"), allow_pickle=True)
5192
out = io.save_physio(tmpdir.join("tmp").purebasename, pckl)

physutils/tests/utils.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
Utilities for testing
33
"""
44

5+
import json
6+
from os import makedirs
57
from os.path import join as pjoin
68

79
import numpy as np
10+
import pandas as pd
811
from pkg_resources import resource_filename
912
from scipy import signal
1013

@@ -77,3 +80,91 @@ def filter_physio(data, cutoffs, method, *, order=3):
7780
filtered = physio.new_physio_like(data, signal.filtfilt(b, a, data))
7881

7982
return filtered
83+
84+
85+
def create_random_bids_structure(data_dir, recording_id=None):
86+
87+
dataset_description = {
88+
"Name": "Example BIDS Dataset",
89+
"BIDSVersion": "1.7.0",
90+
"License": "",
91+
"Authors": ["Author1", "Author2"],
92+
"Acknowledgements": "",
93+
"HowToAcknowledge": "",
94+
"Funding": "",
95+
"ReferencesAndLinks": "",
96+
"DatasetDOI": "",
97+
}
98+
99+
physio_json = {
100+
"SamplingFrequency": 10000.0,
101+
"StartTime": -3,
102+
"Columns": [
103+
"time",
104+
"respiratory_chest",
105+
"trigger",
106+
"cardiac",
107+
"respiratory_CO2",
108+
"respiratory_O2",
109+
],
110+
}
111+
112+
# Create BIDS structure directory
113+
subject_id = "01"
114+
session_id = "01"
115+
task_id = "rest"
116+
run_id = "01"
117+
recording_id = recording_id
118+
119+
bids_dir = pjoin(
120+
data_dir, "bids-dir", f"sub-{subject_id}", f"ses-{session_id}", "func"
121+
)
122+
makedirs(bids_dir, exist_ok=True)
123+
124+
# Create dataset_description.json
125+
with open(pjoin(data_dir, "bids-dir", "dataset_description.json"), "w") as f:
126+
json.dump(dataset_description, f, indent=4)
127+
128+
if recording_id is not None:
129+
filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}_recording-{recording_id}"
130+
else:
131+
filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}"
132+
133+
# Create physio.json
134+
with open(
135+
pjoin(
136+
bids_dir,
137+
f"{filename_body}_physio.json",
138+
),
139+
"w",
140+
) as f:
141+
json.dump(physio_json, f, indent=4)
142+
143+
# Initialize tsv file with random data columns and a time column
144+
num_rows = 100000
145+
num_cols = 6
146+
time_offset = 2
147+
time = (
148+
np.arange(num_rows) / physio_json["SamplingFrequency"]
149+
+ physio_json["StartTime"]
150+
- time_offset
151+
)
152+
data = np.column_stack((time, np.random.rand(num_rows, num_cols - 1).round(8)))
153+
df = pd.DataFrame(data)
154+
155+
# Compress dataframe into tsv.gz
156+
tsv_gz_file = pjoin(
157+
bids_dir,
158+
f"{filename_body}_physio.tsv.gz",
159+
)
160+
161+
df.to_csv(
162+
tsv_gz_file,
163+
sep="\t",
164+
index=False,
165+
header=False,
166+
float_format="%.8e",
167+
compression="gzip",
168+
)
169+
170+
return bids_dir

0 commit comments

Comments
 (0)