Skip to content

Commit 385792b

Browse files
committed
Expand/pivot PsychoPy lists-in-cells + various fixes
1 parent 773da1f commit 385792b

File tree

6 files changed

+47
-40
lines changed

6 files changed

+47
-40
lines changed

bidscoin/bidseditor.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,12 +1507,12 @@ def run2data(self) -> tuple:
15071507

15081508
# Set up the data for the events table
15091509
df = self.events.logtable
1510-
events_data['log_table'] = [[{'value': name, 'editable': False} for name in df.columns]] if len(df) else []
1510+
events_data['log_table'] = [[{'value': name, 'editable': False} for name in df]] if len(df) else []
15111511
for i in range(len(df)):
15121512
events_data['log_table'].append([{'value': value, 'editable': False} for value in df.iloc[i]])
15131513

15141514
df = self.events.eventstable
1515-
events_data['table'] = [[{'value': name, 'editable': False} for name in df.columns]] if len(df) else []
1515+
events_data['table'] = [[{'value': name, 'editable': False} for name in df]] if len(df) else []
15161516
for i in range(len(df)):
15171517
events_data['table'].append([{'value': value, 'editable': False} for value in df.iloc[i]])
15181518

@@ -1915,6 +1915,7 @@ def reset(self, refresh: bool=False):
19151915
self.fill_table(self.meta_table, meta_data)
19161916
if events_data:
19171917
self.fill_table(self.events_parsing, events_data['parsing'])
1918+
self.fill_table(self.log_table, events_data['log_table'])
19181919
self.fill_table(self.events_time, events_data['time'])
19191920
self.fill_table(self.events_rows, events_data['rows'])
19201921
self.fill_table(self.events_columns, events_data['columns'])

bidscoin/heuristics/bidsmap_dccn.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,13 +1161,14 @@ Psychopy:
11611161
events: &psychopy_events
11621162
parsing: # The settings to parse the source table from the log file
11631163
table: [long-wide, pivot, 1] # The raw source table or a pivoted 'onset', 'duration', 'event_type' version
1164+
expand: scannerPulse.rt # Expands lists into columns for each array item
11641165
columns: # Columns that are included in the output table, i.e. {output column: input column}
11651166
- onset: onset # The mapping for the first required column 'onset'
11661167
- duration: duration # The mapping for the second required column 'duration'
11671168
- event_type: event_type
11681169
rows:
11691170
- condition: # Dict(s): key = column name of the log input table, value = fullmatch regular expression to select the rows of interest
1170-
onset: '\d.*' # Select rows with numerical / non-empty onsets
1171+
event_type: '.*'
11711172
time:
11721173
cols: ['(?i).*time.*', '(?i).*duration.*', '(?i).*onset.*', '(?i).*start.*', '(?i).*stop.*', '.*\.rt']
11731174

@@ -1247,9 +1248,6 @@ Logdata:
12471248
meta: &free_func_meta
12481249
TaskName:
12491250
events: &free_events
1250-
rows: # Rows that are included in the output table
1251-
- condition: # Dict(s): key = column name of the log input table, value = fullmatch regular expression to select the rows of interest
1252-
onset: '\d.*' # Select rows with numerical / non-empty onsets
12531251
time:
12541252
cols: ['(?i).*time.*', '(?i).*duration.*', '(?i).*onset.*', '(?i).*start.*', '(?i).*stop.*']
12551253

bidscoin/heuristics/bidsmap_sst.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,13 +1163,14 @@ Psychopy:
11631163
events: &psychopy_events
11641164
parsing: # The settings to parse the source table from the log file
11651165
table: [long-wide, pivot, 1] # The raw source table or a pivoted 'onset', 'duration', 'event_type' version
1166+
expand: scannerPulse.rt # Expands lists into columns for each array item
11661167
columns: # Columns that are included in the output table, i.e. {output column: input column}
11671168
- onset: onset # The mapping for the first required column 'onset'
11681169
- duration: duration # The mapping for the second required column 'duration'
11691170
- event_type: event_type
11701171
rows:
11711172
- condition: # Dict(s): key = column name of the log input table, value = fullmatch regular expression to select the rows of interest
1172-
onset: '\d.*' # Select rows with numerical / non-empty onsets
1173+
event_type: '.*'
11731174
time:
11741175
cols: ['(?i).*time.*', '(?i).*duration.*', '(?i).*onset.*', '(?i).*start.*', '(?i).*stop.*', '.*\.rt']
11751176

@@ -1249,9 +1250,6 @@ Logdata:
12491250
meta: &free_func_meta
12501251
TaskName:
12511252
events: &free_events
1252-
rows: # Rows that are included in the output table
1253-
- condition: # Dict(s): key = column name of the log input table, value = fullmatch regular expression to select the rows of interest
1254-
onset: '\d.*' # Select rows with numerical / non-empty onsets
12551253
time:
12561254
cols: ['(?i).*time.*', '(?i).*duration.*', '(?i).*onset.*', '(?i).*start.*', '(?i).*stop.*']
12571255

bidscoin/plugins/__init__.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -245,33 +245,33 @@ def eventstable(self) -> pd.DataFrame:
245245
df = self.logtable.copy() # Ensure we do not change the source data
246246

247247
# Convert the timing values to seconds (with maximally 4 digits after the decimal point)
248-
timecols = list(set([col for col in df.columns for pattern in self.time.cols if re.fullmatch(pattern, col)]))
248+
timecols = list(set(col for col in df for pattern in self.time.cols if re.fullmatch(pattern, col)))
249249
df[timecols] = (df[timecols].apply(pd.to_numeric, errors='coerce') / self.time.unit).round(4)
250250

251251
# Take the logtable columns of interest and from now on use the BIDS column names
252-
df = df.loc[:, [sourcecol for item in self.columns for sourcecol in item.values() if sourcecol in df.columns]]
253-
df.columns = [eventscol for item in self.columns for eventscol, sourcecol in item.items() if sourcecol in df.columns]
254-
if 'onset' not in df.columns: df.insert(0, 'onset', None)
255-
if 'duration' not in df.columns: df.insert(1, 'duration', None)
252+
df = df.loc[:, [sourcecol for item in self.columns for sourcecol in item.values() if sourcecol in df]]
253+
df.columns = [eventscol for item in self.columns for eventscol, sourcecol in item.items() if sourcecol in df]
254+
if 'onset' not in df: df.insert(0, 'onset', None)
255+
if 'duration' not in df: df.insert(1, 'duration', None)
256256

257257
# Set the clock at zero at the start of the experiment
258258
if self.time.start:
259-
start = pd.Series([True] * len(df))
259+
start = pd.Series([True] * len(df), index=df.index)
260260
for column, value in self.time.start.items():
261-
if column in self.logtable.columns:
261+
if column in self.logtable:
262262
start &= (self.logtable[column].astype(str) == str(value))
263263
if start.any():
264264
LOGGER.bcdebug(f"Resetting clock offset: {df['onset'][start].iloc[0]}")
265265
df['onset'] -= df['onset'][start].iloc[0] # Take the time of the first occurrence as zero
266266

267267
# Loop over the row groups to filter/edit the rows
268-
rows = pd.Series([len(self.rows) == 0] * len(df)) # All rows are True if no row expressions were specified
268+
rows = pd.Series([len(self.rows) == 0] * len(df), index=df.index) # All rows are True if no row expressions were specified
269269
for group in self.rows: # With a group the expressions are AND between groups they are OR
270270

271-
rowgroup = pd.Series([True] * len(df))
271+
rowgroup = pd.Series([True] * len(df), index=df.index)
272272
for column, pattern in (group.get('condition') or {}).items():
273273

274-
if column not in self.logtable.columns:
274+
if column not in self.logtable:
275275
LOGGER.bcdebug(f"Unknown condition column: {column}")
276276
continue
277277

bidscoin/plugins/events2bids.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""The events2bids plugin converts neurobs Presentation logfiles to event.tsv files"""
2-
2+
import ast
33
import logging
44
import json
55
import dateutil.parser
@@ -310,46 +310,56 @@ def __init__(self, sourcefile: Path, data: dict, options: dict):
310310
else:
311311
LOGGER.debug(f"Cannot read/parse {sourcefile}")
312312
self._sourcetable = pd.DataFrame()
313-
self._sourcecols = self._sourcetable.columns
314-
"""Store the original column names"""
315313

316314
@property
317315
def logtable(self) -> pd.DataFrame:
318316
"""Returns the Psychopy log-table"""
319317

318+
table = self.parsing.get('table', ['long-wide', 'pivot', 1])
319+
table = table[table[-1]]
320+
320321
# Start with a fresh data frame
321-
df = self._sourcetable
322-
df.columns = self._sourcecols
322+
df = self._sourcetable.copy()
323323
if not len(df):
324324
return df
325325

326+
# Expand the array items
327+
try:
328+
for expand in set(col for col in df if re.fullmatch(self.parsing.get('expand') or '', col)):
329+
ds = df[expand].apply(lambda x: ast.literal_eval(x) if isinstance(x,str) and x.startswith('[') else []) # Convert string representation of lists into actual Python lists
330+
df_ = ds.apply(pd.Series).add_prefix(f"{expand}{'.started' if '.' in expand and table=='pivot' else ''}_") # Append `.started` to pivot the data into the onset column
331+
if '.' in expand: # Time columns should have a `.` in their name
332+
df_ = df_.rename(columns=lambda col: re.sub(r'(.*)\.(\w+)_(\d+)', r'\1_\3.\2', col)) # Put e.g. `.rt` or `.started` back at the end
333+
if not df_.empty:
334+
df = pd.concat([df.drop(columns=[expand]), df_], axis=1)
335+
except re.error as pattern_error:
336+
LOGGER.warning(f"The expand pattern {self.parsing.get('expand')} is invalid\n{pattern_error}")
337+
326338
# Use the raw source data
327-
table = self.parsing.get('table', ['long-wide', 'pivot', 1])
328-
table = table[table[-1]]
329339
if table == 'long-wide':
330340
pass
331341

332342
# Create a pivoted dataframe with 'onset', 'duration' and 'event_type' columns
333343
elif table == 'pivot':
334344

335-
df_piv = pd.DataFrame(columns=['onset', 'duration', 'event_type'])
336-
337345
# Extract event column names without '.started' suffixes
338-
events = sorted(set(col.split('.')[0] for col in df.columns if '.started' in col))
346+
events = set(col.rsplit('.',1)[0] for col in df if col.endswith('.started'))
339347

340348
# Create new DataFrame with 'onset', 'duration', and 'event_type'
349+
df_piv = pd.DataFrame(columns=['onset', 'duration', 'event_type']) # Collects all pivoted event data
341350
for event in events:
342-
onset = df[(started := f"{event}.started")] # Get the onset times
343-
if (stopped := f"{event}.stopped") in df.columns:
351+
onset = df[(started := f"{event}.started")] # Get the onset times
352+
if (stopped := f"{event}.stopped") in df:
344353
duration = df[stopped] - df[started]
345354
else:
346-
duration = pd.Series([float('nan')] * len(df)) # Use NaN for missing `.stopped`
347-
event_type = [event] * len(df) # Store the event name
348-
timecols = list(set([col for col in df.columns for pattern in self.time.cols if re.fullmatch(pattern, col)
349-
and col not in df_piv.columns and not col.endswith(('.started', '.stopped'))]))
350-
df_piv = pd.concat([df_piv.dropna(axis=1, how='all'),
351-
pd.DataFrame({'onset': onset, 'duration': duration, 'event_type': event_type}).dropna(axis=1, how='all'),
352-
df[timecols].dropna(axis=1, how='all')], ignore_index=True)
355+
duration = pd.Series([float('nan')] * len(df), index=df.index)
356+
df_piv_ = pd.DataFrame({'onset': onset, 'duration': duration, 'event_type': [event]*len(df)}, index=df.index).dropna(subset=['onset'])
357+
df_misc = df.filter(regex=r'^(?!.*\.(started|stopped)$)').loc[df_piv_.index,:] # Drop all columns that end with '.started', '.stopped'
358+
if not df_piv_.empty: # Only concatenate if df_piv_ has data
359+
if df_piv.empty:
360+
df_piv = pd.concat([df_piv_, df_misc], axis=1) # Re-initialize df_piv / avoid future warnings below about concatenating empty frames
361+
else:
362+
df_piv = pd.concat([df_piv, pd.concat([df_piv_, df_misc], axis=1)])
353363
df = df_piv.sort_values(by='onset')
354364

355365
else:

bidscoin/utilities/physio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def physio2tsv(physio: dict, tsvfile: Union[str, Path]):
371371

372372
# Add each trace to a data table and save the table as a BIDS-compliant gzipped tsv file
373373
physiotable = pd.DataFrame(columns=[key for key in physio if key not in ('UUID','ScanDate','Freq','SliceMap','ACQ','Meta')])
374-
for key in physiotable.columns:
374+
for key in physiotable:
375375
physiotable[key] = physio[key]
376376
LOGGER.verbose(f"Writing physiological traces to: '{tsvfile}'")
377377
physiotable.to_csv(tsvfile, header=False, index=False, sep='\t', compression='infer')

0 commit comments

Comments
 (0)