Skip to content

Commit f48ef58

Browse files
committed
Move Eventsparser class to the plugins subpackage
1 parent 237cd8e commit f48ef58

File tree

3 files changed

+169
-158
lines changed

3 files changed

+169
-158
lines changed

bidscoin/bids.py

Lines changed: 1 addition & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323
from pathlib import Path
2424
from typing import List, Set, Tuple, Union, Dict, Any, Iterable, NewType
2525
from pydicom import dcmread, fileset, config
26-
from abc import ABC, abstractmethod
2726
from importlib.util import find_spec
2827
if find_spec('bidscoin') is None:
2928
import sys
3029
sys.path.append(str(Path(__file__).parents[1]))
3130
from bidscoin import bcoin, schemafolder, templatefolder, lsdirs, is_hidden, __version__, DEBUG
3231
from bidscoin.utilities import dicomsort
32+
from bidscoin.plugins import EventsParser
3333
from ruamel.yaml import YAML
3434
yaml = YAML()
3535
yaml.representer.ignore_aliases = lambda *data: True # Expand aliases (https://stackoverflow.com/questions/58091449/disabling-alias-for-yaml-file-in-python)
@@ -59,162 +59,6 @@
5959
"""The possible extensions of BIDS data files"""
6060

6161

62-
class EventsParser(ABC):
63-
"""Parser for stimulus presentation logfiles"""
64-
65-
def __init__(self, sourcefile: Path, eventsdata: dict, options: dict):
66-
"""
67-
Reads the events table from the events logfile
68-
69-
:param sourcefile: The full filepath of the raw logfile
70-
:param eventsdata: The run['events'] data (from a bidsmap)
71-
:param options: The plugin options
72-
"""
73-
74-
self.sourcefile = sourcefile
75-
self._data = eventsdata
76-
self.options = options
77-
78-
def __repr__(self):
79-
80-
return (f"{self.__class__}\n"
81-
f"Path:\t\t{self.sourcefile}\n"
82-
f"Time.cols:\t{self.time.get('cols')}\n"
83-
f"Time.unit:\t{self.time.get('unit')}\n"
84-
f"Time.start:\t{self.time.get('start')}\n"
85-
f"Columns:\t{self.columns}\n"
86-
f"Rows:\t{self.rows}")
87-
88-
def __str__(self):
89-
90-
return f"{self.sourcefile}"
91-
92-
@property
93-
@abstractmethod
94-
def logtable(self) -> pd.DataFrame:
95-
"""Returns the source logging data"""
96-
97-
@property
98-
def eventstable(self) -> pd.DataFrame:
99-
"""Returns the target events.tsv data"""
100-
101-
# Check the parser's data structure
102-
if not self.isvalid:
103-
return pd.DataFrame()
104-
105-
df = copy.deepcopy(self.logtable)
106-
107-
# Convert the timing values to seconds (with maximally 4 digits after the decimal point)
108-
df[self.time['cols']] = (df[self.time['cols']].apply(pd.to_numeric, errors='coerce') / self.time['unit']).round(4)
109-
110-
# Take the logtable columns of interest and from now on use the BIDS column names
111-
df = df.loc[:, [sourcecol for item in self.columns for sourcecol in item.values() if sourcecol]]
112-
df.columns = [eventscol for item in self.columns for eventscol, sourcecol in item.items() if sourcecol]
113-
114-
# Set the clock at zero at the start of the experiment
115-
if self.time.get('start'):
116-
start = pd.Series([True] * len(df))
117-
for column, value in self.time['start'].items():
118-
start &= (self.logtable[column].astype(str) == str(value)).values
119-
if start.any():
120-
LOGGER.bcdebug(f"Resetting clock offset: {df['onset'][start.values].iloc[0]}")
121-
df['onset'] -= df['onset'][start.values].iloc[0] # Take the time of the first occurrence as zero
122-
123-
# Loop over the row groups to filter/edit the rows
124-
rows = pd.Series([len(self.rows) == 0] * len(df)).astype(bool) # Boolean series with True values if no row expressions were specified
125-
for group in self.rows:
126-
127-
for column, regex in group['include'].items():
128-
129-
# Get the rows that match the expression, i.e. make them True
130-
rowgroup = self.logtable[column].astype(str).str.fullmatch(str(regex))
131-
132-
# Add the matching rows to the grand rows group
133-
rows |= rowgroup.values
134-
135-
# Write the value(s) of the matching rows
136-
for colname, values in (group.get('cast') or {}).items():
137-
df.loc[rowgroup, colname] = values
138-
139-
return df.loc[rows.values].sort_values(by='onset')
140-
141-
@property
142-
def columns(self) -> List[dict]:
143-
"""List with mappings for the column names of the eventstable"""
144-
return self._data.get('columns') or []
145-
146-
@columns.setter
147-
def columns(self, value: List[dict]):
148-
self._data['columns'] = value
149-
150-
@property
151-
def rows(self) -> List[dict]:
152-
"""List with fullmatch regular expression dictionaries that yield row sets in the eventstable"""
153-
return self._data.get('rows') or []
154-
155-
@rows.setter
156-
def rows(self, value: List[dict]):
157-
self._data['rows'] = value
158-
159-
@property
160-
def time(self) -> dict:
161-
"""A dictionary with 'start', 'cols' and 'unit' values"""
162-
return self._data.get('time') or {}
163-
164-
@time.setter
165-
def time(self, value: dict):
166-
self._data['time'] = value
167-
168-
@property
169-
def isvalid(self) -> bool:
170-
"""Check the EventsParser data structure"""
171-
172-
def is_float(s):
173-
try:
174-
float(s)
175-
return True
176-
except (ValueError, TypeError):
177-
return False
178-
179-
if not (valid := len(self.columns) >= 2):
180-
LOGGER.warning(f"Events table must have at least two columns, got {len(self.columns)} instead\n{self}")
181-
return False
182-
183-
if (key := [*self.columns[0].keys()][0]) != 'onset':
184-
LOGGER.warning(f"First events column must be named 'onset', got '{key}' instead\n{self}")
185-
valid = False
186-
187-
if (key := [*self.columns[1].keys()][0]) != 'duration':
188-
LOGGER.warning(f"Second events column must be named 'duration', got '{key}' instead\n{self}")
189-
valid = False
190-
191-
if len(self.time.get('cols',[])) < 2:
192-
LOGGER.warning(f"Events table must have at least two timecol items, got {len(self.time.get('cols',[]))} instead\n{self}")
193-
return False
194-
195-
elif not is_float(self.time.get('unit')):
196-
LOGGER.warning(f"Time conversion factor must be a float, got '{self.time.get('unit')}' instead\n{self}")
197-
valid = False
198-
199-
# Check if the logtable has existing and unique column names
200-
columns = self.logtable.columns
201-
for name in set([name for item in self.columns for name in item.values()] + [name for item in self.rows for name in item['include'].keys()] +
202-
[*self.time.get('start',{}).keys()] + self.time.get('cols',[])):
203-
if name and name not in columns:
204-
LOGGER.warning(f"Column '{name}' not found in the event table of {self}")
205-
valid = False
206-
if columns.duplicated().any():
207-
LOGGER.warning(f"Duplicate columns found: {columns}\n{self}")
208-
valid = False
209-
210-
return valid
211-
212-
def write(self, targetfile: Path):
213-
"""Write the eventstable to a BIDS events.tsv file"""
214-
215-
self.eventstable.to_csv(targetfile, sep='\t', index=False)
216-
217-
21862
class DataSource:
21963
"""Reads properties, attributes and BIDS-related features to sourcefiles of a supported dataformat (e.g. DICOM or PAR)"""
22064

bidscoin/plugins/__init__.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,167 @@
11
"""Pre-installed plugins"""
2+
3+
import logging
4+
import copy
5+
import pandas as pd
6+
from pathlib import Path
7+
from abc import ABC, abstractmethod
8+
from typing import List
9+
10+
LOGGER = logging.getLogger(__name__)
11+
12+
13+
class EventsParser(ABC):
14+
"""Parser for stimulus presentation logfiles"""
15+
16+
def __init__(self, sourcefile: Path, eventsdata: dict, options: dict):
17+
"""
18+
Reads the events table from the events logfile
19+
20+
:param sourcefile: The full filepath of the raw logfile
21+
:param eventsdata: The run['events'] data (from a bidsmap)
22+
:param options: The plugin options
23+
"""
24+
25+
self.sourcefile = sourcefile
26+
self._data = eventsdata
27+
self.options = options
28+
29+
def __repr__(self):
30+
31+
return (f"{self.__class__}\n"
32+
f"Path:\t\t{self.sourcefile}\n"
33+
f"Time.cols:\t{self.time.get('cols')}\n"
34+
f"Time.unit:\t{self.time.get('unit')}\n"
35+
f"Time.start:\t{self.time.get('start')}\n"
36+
f"Columns:\t{self.columns}\n"
37+
f"Rows:\t{self.rows}")
38+
39+
def __str__(self):
40+
41+
return f"{self.sourcefile}"
42+
43+
@property
44+
@abstractmethod
45+
def logtable(self) -> pd.DataFrame:
46+
"""Returns the source logging data"""
47+
48+
@property
49+
def eventstable(self) -> pd.DataFrame:
50+
"""Returns the target events.tsv data"""
51+
52+
# Check the parser's data structure
53+
if not self.isvalid:
54+
return pd.DataFrame()
55+
56+
df = copy.deepcopy(self.logtable)
57+
58+
# Convert the timing values to seconds (with maximally 4 digits after the decimal point)
59+
df[self.time['cols']] = (df[self.time['cols']].apply(pd.to_numeric, errors='coerce') / self.time['unit']).round(4)
60+
61+
# Take the logtable columns of interest and from now on use the BIDS column names
62+
df = df.loc[:, [sourcecol for item in self.columns for sourcecol in item.values() if sourcecol]]
63+
df.columns = [eventscol for item in self.columns for eventscol, sourcecol in item.items() if sourcecol]
64+
65+
# Set the clock at zero at the start of the experiment
66+
if self.time.get('start'):
67+
start = pd.Series([True] * len(df))
68+
for column, value in self.time['start'].items():
69+
start &= (self.logtable[column].astype(str) == str(value)).values
70+
if start.any():
71+
LOGGER.bcdebug(f"Resetting clock offset: {df['onset'][start.values].iloc[0]}")
72+
df['onset'] -= df['onset'][start.values].iloc[0] # Take the time of the first occurrence as zero
73+
74+
# Loop over the row groups to filter/edit the rows
75+
rows = pd.Series([len(self.rows) == 0] * len(df)).astype(bool) # Boolean series with True values if no row expressions were specified
76+
for group in self.rows:
77+
78+
for column, regex in group['include'].items():
79+
80+
# Get the rows that match the expression, i.e. make them True
81+
rowgroup = self.logtable[column].astype(str).str.fullmatch(str(regex))
82+
83+
# Add the matching rows to the grand rows group
84+
rows |= rowgroup.values
85+
86+
# Write the value(s) of the matching rows
87+
for colname, values in (group.get('cast') or {}).items():
88+
df.loc[rowgroup, colname] = values
89+
90+
return df.loc[rows.values].sort_values(by='onset')
91+
92+
@property
93+
def columns(self) -> List[dict]:
94+
"""List with mappings for the column names of the eventstable"""
95+
return self._data.get('columns') or []
96+
97+
@columns.setter
98+
def columns(self, value: List[dict]):
99+
self._data['columns'] = value
100+
101+
@property
102+
def rows(self) -> List[dict]:
103+
"""List with fullmatch regular expression dictionaries that yield row sets in the eventstable"""
104+
return self._data.get('rows') or []
105+
106+
@rows.setter
107+
def rows(self, value: List[dict]):
108+
self._data['rows'] = value
109+
110+
@property
111+
def time(self) -> dict:
112+
"""A dictionary with 'start', 'cols' and 'unit' values"""
113+
return self._data.get('time') or {}
114+
115+
@time.setter
116+
def time(self, value: dict):
117+
self._data['time'] = value
118+
119+
@property
120+
def isvalid(self) -> bool:
121+
"""Check the EventsParser data structure"""
122+
123+
def is_float(s):
124+
try:
125+
float(s)
126+
return True
127+
except (ValueError, TypeError):
128+
return False
129+
130+
if not (valid := len(self.columns) >= 2):
131+
LOGGER.warning(f"Events table must have at least two columns, got {len(self.columns)} instead\n{self}")
132+
return False
133+
134+
if (key := [*self.columns[0].keys()][0]) != 'onset':
135+
LOGGER.warning(f"First events column must be named 'onset', got '{key}' instead\n{self}")
136+
valid = False
137+
138+
if (key := [*self.columns[1].keys()][0]) != 'duration':
139+
LOGGER.warning(f"Second events column must be named 'duration', got '{key}' instead\n{self}")
140+
valid = False
141+
142+
if len(self.time.get('cols',[])) < 2:
143+
LOGGER.warning(f"Events table must have at least two timecol items, got {len(self.time.get('cols',[]))} instead\n{self}")
144+
return False
145+
146+
elif not is_float(self.time.get('unit')):
147+
LOGGER.warning(f"Time conversion factor must be a float, got '{self.time.get('unit')}' instead\n{self}")
148+
valid = False
149+
150+
# Check if the logtable has existing and unique column names
151+
columns = self.logtable.columns
152+
for name in set([name for item in self.columns for name in item.values()] + [name for item in self.rows for name in item['include'].keys()] +
153+
[*self.time.get('start',{}).keys()] + self.time.get('cols',[])):
154+
if name and name not in columns:
155+
LOGGER.warning(f"Column '{name}' not found in the event table of {self}")
156+
valid = False
157+
if columns.duplicated().any():
158+
LOGGER.warning(f"Duplicate columns found: {columns}\n{self}")
159+
valid = False
160+
161+
return valid
162+
163+
def write(self, targetfile: Path):
164+
"""Write the eventstable to a BIDS events.tsv file"""
165+
166+
self.eventstable.to_csv(targetfile, sep='\t', index=False)
167+

bidscoin/plugins/events2bids.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from bids_validator import BIDSValidator
88
from pathlib import Path
99
from bidscoin import bids
10-
from bidscoin.bids import BidsMap, DataFormat, EventsParser, is_hidden, Plugin
10+
from bidscoin.plugins import EventsParser
11+
from bidscoin.bids import BidsMap, DataFormat, is_hidden, Plugin
1112
# from convert_eprime.utils import remove_unicode
1213

1314
LOGGER = logging.getLogger(__name__)

0 commit comments

Comments
 (0)