|
23 | 23 | from pathlib import Path |
24 | 24 | from typing import List, Set, Tuple, Union, Dict, Any, Iterable, NewType |
25 | 25 | from pydicom import dcmread, fileset, config |
26 | | -from abc import ABC, abstractmethod |
27 | 26 | from importlib.util import find_spec |
28 | 27 | if find_spec('bidscoin') is None: |
29 | 28 | import sys |
30 | 29 | sys.path.append(str(Path(__file__).parents[1])) |
31 | 30 | from bidscoin import bcoin, schemafolder, templatefolder, lsdirs, is_hidden, __version__, DEBUG |
32 | 31 | from bidscoin.utilities import dicomsort |
| 32 | +from bidscoin.plugins import EventsParser |
33 | 33 | from ruamel.yaml import YAML |
34 | 34 | yaml = YAML() |
35 | 35 | yaml.representer.ignore_aliases = lambda *data: True # Expand aliases (https://stackoverflow.com/questions/58091449/disabling-alias-for-yaml-file-in-python) |
|
59 | 59 | """The possible extensions of BIDS data files""" |
60 | 60 |
|
61 | 61 |
|
62 | | -class EventsParser(ABC): |
63 | | - """Parser for stimulus presentation logfiles""" |
64 | | - |
65 | | - def __init__(self, sourcefile: Path, eventsdata: dict, options: dict): |
66 | | - """ |
67 | | - Reads the events table from the events logfile |
68 | | -
|
69 | | - :param sourcefile: The full filepath of the raw logfile |
70 | | - :param eventsdata: The run['events'] data (from a bidsmap) |
71 | | - :param options: The plugin options |
72 | | - """ |
73 | | - |
74 | | - self.sourcefile = sourcefile |
75 | | - self._data = eventsdata |
76 | | - self.options = options |
77 | | - |
78 | | - def __repr__(self): |
79 | | - |
80 | | - return (f"{self.__class__}\n" |
81 | | - f"Path:\t\t{self.sourcefile}\n" |
82 | | - f"Time.cols:\t{self.time.get('cols')}\n" |
83 | | - f"Time.unit:\t{self.time.get('unit')}\n" |
84 | | - f"Time.start:\t{self.time.get('start')}\n" |
85 | | - f"Columns:\t{self.columns}\n" |
86 | | - f"Rows:\t{self.rows}") |
87 | | - |
88 | | - def __str__(self): |
89 | | - |
90 | | - return f"{self.sourcefile}" |
91 | | - |
92 | | - @property |
93 | | - @abstractmethod |
94 | | - def logtable(self) -> pd.DataFrame: |
95 | | - """Returns the source logging data""" |
96 | | - |
97 | | - @property |
98 | | - def eventstable(self) -> pd.DataFrame: |
99 | | - """Returns the target events.tsv data""" |
100 | | - |
101 | | - # Check the parser's data structure |
102 | | - if not self.isvalid: |
103 | | - return pd.DataFrame() |
104 | | - |
105 | | - df = copy.deepcopy(self.logtable) |
106 | | - |
107 | | - # Convert the timing values to seconds (with maximally 4 digits after the decimal point) |
108 | | - df[self.time['cols']] = (df[self.time['cols']].apply(pd.to_numeric, errors='coerce') / self.time['unit']).round(4) |
109 | | - |
110 | | - # Take the logtable columns of interest and from now on use the BIDS column names |
111 | | - df = df.loc[:, [sourcecol for item in self.columns for sourcecol in item.values() if sourcecol]] |
112 | | - df.columns = [eventscol for item in self.columns for eventscol, sourcecol in item.items() if sourcecol] |
113 | | - |
114 | | - # Set the clock at zero at the start of the experiment |
115 | | - if self.time.get('start'): |
116 | | - start = pd.Series([True] * len(df)) |
117 | | - for column, value in self.time['start'].items(): |
118 | | - start &= (self.logtable[column].astype(str) == str(value)).values |
119 | | - if start.any(): |
120 | | - LOGGER.bcdebug(f"Resetting clock offset: {df['onset'][start.values].iloc[0]}") |
121 | | - df['onset'] -= df['onset'][start.values].iloc[0] # Take the time of the first occurrence as zero |
122 | | - |
123 | | - # Loop over the row groups to filter/edit the rows |
124 | | - rows = pd.Series([len(self.rows) == 0] * len(df)).astype(bool) # Boolean series with True values if no row expressions were specified |
125 | | - for group in self.rows: |
126 | | - |
127 | | - for column, regex in group['include'].items(): |
128 | | - |
129 | | - # Get the rows that match the expression, i.e. make them True |
130 | | - rowgroup = self.logtable[column].astype(str).str.fullmatch(str(regex)) |
131 | | - |
132 | | - # Add the matching rows to the grand rows group |
133 | | - rows |= rowgroup.values |
134 | | - |
135 | | - # Write the value(s) of the matching rows |
136 | | - for colname, values in (group.get('cast') or {}).items(): |
137 | | - df.loc[rowgroup, colname] = values |
138 | | - |
139 | | - return df.loc[rows.values].sort_values(by='onset') |
140 | | - |
141 | | - @property |
142 | | - def columns(self) -> List[dict]: |
143 | | - """List with mappings for the column names of the eventstable""" |
144 | | - return self._data.get('columns') or [] |
145 | | - |
146 | | - @columns.setter |
147 | | - def columns(self, value: List[dict]): |
148 | | - self._data['columns'] = value |
149 | | - |
150 | | - @property |
151 | | - def rows(self) -> List[dict]: |
152 | | - """List with fullmatch regular expression dictionaries that yield row sets in the eventstable""" |
153 | | - return self._data.get('rows') or [] |
154 | | - |
155 | | - @rows.setter |
156 | | - def rows(self, value: List[dict]): |
157 | | - self._data['rows'] = value |
158 | | - |
159 | | - @property |
160 | | - def time(self) -> dict: |
161 | | - """A dictionary with 'start', 'cols' and 'unit' values""" |
162 | | - return self._data.get('time') or {} |
163 | | - |
164 | | - @time.setter |
165 | | - def time(self, value: dict): |
166 | | - self._data['time'] = value |
167 | | - |
168 | | - @property |
169 | | - def isvalid(self) -> bool: |
170 | | - """Check the EventsParser data structure""" |
171 | | - |
172 | | - def is_float(s): |
173 | | - try: |
174 | | - float(s) |
175 | | - return True |
176 | | - except (ValueError, TypeError): |
177 | | - return False |
178 | | - |
179 | | - if not (valid := len(self.columns) >= 2): |
180 | | - LOGGER.warning(f"Events table must have at least two columns, got {len(self.columns)} instead\n{self}") |
181 | | - return False |
182 | | - |
183 | | - if (key := [*self.columns[0].keys()][0]) != 'onset': |
184 | | - LOGGER.warning(f"First events column must be named 'onset', got '{key}' instead\n{self}") |
185 | | - valid = False |
186 | | - |
187 | | - if (key := [*self.columns[1].keys()][0]) != 'duration': |
188 | | - LOGGER.warning(f"Second events column must be named 'duration', got '{key}' instead\n{self}") |
189 | | - valid = False |
190 | | - |
191 | | - if len(self.time.get('cols',[])) < 2: |
192 | | - LOGGER.warning(f"Events table must have at least two timecol items, got {len(self.time.get('cols',[]))} instead\n{self}") |
193 | | - return False |
194 | | - |
195 | | - elif not is_float(self.time.get('unit')): |
196 | | - LOGGER.warning(f"Time conversion factor must be a float, got '{self.time.get('unit')}' instead\n{self}") |
197 | | - valid = False |
198 | | - |
199 | | - # Check if the logtable has existing and unique column names |
200 | | - columns = self.logtable.columns |
201 | | - for name in set([name for item in self.columns for name in item.values()] + [name for item in self.rows for name in item['include'].keys()] + |
202 | | - [*self.time.get('start',{}).keys()] + self.time.get('cols',[])): |
203 | | - if name and name not in columns: |
204 | | - LOGGER.warning(f"Column '{name}' not found in the event table of {self}") |
205 | | - valid = False |
206 | | - if columns.duplicated().any(): |
207 | | - LOGGER.warning(f"Duplicate columns found: {columns}\n{self}") |
208 | | - valid = False |
209 | | - |
210 | | - return valid |
211 | | - |
212 | | - def write(self, targetfile: Path): |
213 | | - """Write the eventstable to a BIDS events.tsv file""" |
214 | | - |
215 | | - self.eventstable.to_csv(targetfile, sep='\t', index=False) |
216 | | - |
217 | | - |
218 | 62 | class DataSource: |
219 | 63 | """Reads properties, attributes and BIDS-related features to sourcefiles of a supported dataformat (e.g. DICOM or PAR)""" |
220 | 64 |
|
|
0 commit comments