More robust handling of events input data

marcelzwiers · marcelzwiers · commit 0c5682394349 · 2025-02-11T17:09:14.000+01:00
diff --git a/bidscoin/bidseditor.py b/bidscoin/bidseditor.py
@@ -6,6 +6,7 @@
 import copy
 import webbrowser
 import ast
+import re
 import json
 import csv
 import nibabel as nib
@@ -1673,13 +1674,17 @@ def events_rows2run(self, rowindex: int, colindex: int):
         if mapping:
             try:
                 mapping = ast.literal_eval(mapping)  # Convert stringified dict back to dict
+                for key, pattern in mapping.items():
+                    re.compile(pattern)
                 LOGGER.verbose(f"User sets events['rows'][{rowindex}] to {mapping}' for {self.target_run}")
                 if rowindex == nrows - 1:
                     self.target_run.events['rows'].append({'condition' if colindex==0 else 'cast': mapping})
                 else:
                     self.target_run.events['rows'][rowindex]['condition' if colindex==0 else 'cast'] = mapping
-            except (ValueError, SyntaxError):
-                QMessageBox.warning(self, 'Input error', f"Please enter a valid '{mapping}' dictionary")
+            except (ValueError, SyntaxError) as dict_error:
+                QMessageBox.warning(self, 'Input error', f"Please enter a valid '{mapping}' dictionary\n\n{dict_error}")
+            except re.error as pattern_error:
+                QMessageBox.warning(self, 'Input error', f"Please enter a valid '{mapping}' pattern:\n\n{pattern_error}")
         elif colindex == 0 and rowindex < nrows - 1:                # Remove the row
             del self.target_run.events['rows'][rowindex]
         else:
@@ -1700,7 +1705,7 @@ def events_columns2run(self, rowindex: int, colindex: int):
         output = self.events_columns.item(rowindex, 1).text().strip() if self.events_columns.item(rowindex, 1) else ''
         nrows  = self.events_columns.rowCount()
 
-        if input and not output:
+        if colindex == 0 and input and not output:
             output = input
 
         if not input or input in self.target_run.eventsparser().logtable:
diff --git a/bidscoin/plugins/__init__.py b/bidscoin/plugins/__init__.py
@@ -193,22 +193,24 @@ def eventstable(self) -> pd.DataFrame:
 
         # Check the parser's data structure
         if not self.isvalid:
-            return pd.DataFrame()
+            pass
 
         df = copy.deepcopy(self.logtable)
 
         # Convert the timing values to seconds (with maximally 4 digits after the decimal point)
-        df[self.time['cols']] = (df[self.time['cols']].apply(pd.to_numeric, errors='coerce') / self.time['unit']).round(4)
+        timecols     = [col for col in self.time.get('cols',[]) if col in df.columns]
+        df[timecols] = (df[timecols].apply(pd.to_numeric, errors='coerce') / self.time['unit']).round(4)
 
         # Take the logtable columns of interest and from now on use the BIDS column names
-        df         = df.loc[:, [sourcecol for item in self.columns for sourcecol in item.values() if sourcecol]]
-        df.columns = [eventscol for item in self.columns for eventscol, sourcecol in item.items() if sourcecol]
+        df         = df.loc[:, [sourcecol for item in self.columns for sourcecol in item.values() if sourcecol in df.columns]]
+        df.columns = [eventscol for item in self.columns for eventscol, sourcecol in item.items() if sourcecol in df.columns]
 
         # Set the clock at zero at the start of the experiment
         if self.time.get('start'):
             start = pd.Series([True] * len(df))
             for column, value in self.time['start'].items():
-                start &= (self.logtable[column].astype(str) == str(value)).values
+                if column in self.logtable.columns:
+                    start &= (self.logtable[column].astype(str) == str(value)).values
             if start.any():
                 LOGGER.bcdebug(f"Resetting clock offset: {df['onset'][start.values].iloc[0]}")
                 df['onset'] -= df['onset'][start.values].iloc[0]  # Take the time of the first occurrence as zero
@@ -219,6 +221,9 @@ def eventstable(self) -> pd.DataFrame:
 
             for column, regex in group['condition'].items():
 
+                if column not in self.logtable.columns:
+                    continue
+
                 # Get the rows that match the expression, i.e. make them True
                 rowgroup = self.logtable[column].astype(str).str.fullmatch(str(regex))
 
@@ -294,7 +299,7 @@ def is_float(s):
         for name in set([name for item in self.columns for name in item.values()] + [name for item in self.rows for name in item['condition'].keys()] +
                         [*self.time.get('start', {}).keys()] + self.time.get('cols', [])):
             if name and name not in columns:
-                LOGGER.warning(f"Column '{name}' not found in the event table of {self}")
+                LOGGER.warning(f"Column '{name}' not found in the input table parsed from {self}")
                 valid = False
         if columns.duplicated().any():
             LOGGER.warning(f"Duplicate columns found: {columns}\n{self}")
diff --git a/bidscoin/plugins/events2bids.py b/bidscoin/plugins/events2bids.py
@@ -196,18 +196,18 @@ def logtable(self) -> pd.DataFrame:
 
         # Get the row indices to slice the event, stimulus, video or survey table
         df.columns = self._sourcecols
-        if self.options['table'] == 'event':
+        if self.options['table'].lower() == 'event':
             begin = 0
             end   = min(stimulus_header, video_header, survey_header)
-        elif self.options['table'] == 'stimulus':
+        elif self.options['table'].lower() == 'stimulus':
             df.columns = df.iloc[stimulus_header]
             begin = stimulus_header + 1
             end   = min(video_header, survey_header)
-        elif self.options['table'] == 'video':
+        elif self.options['table'].lower() == 'video':
             df.columns = df.iloc[video_header]
             begin = video_header + 1
             end   = survey_header
-        elif self.options['table'] == 'survey':
+        elif self.options['table'].lower() == 'survey':
             df.columns = df.iloc[survey_header]
             begin = survey_header + 1
             end   = nrows
@@ -222,11 +222,11 @@ def logtable(self) -> pd.DataFrame:
         for i, col in enumerate(df.columns):
             if pd.isna(col) or col == '':       # Check if the column name is NaN or an empty string
                 cols.append(new_col := f"unknown_{i}")
-                LOGGER.info(f"Renaming empty column name at index {i}: {col} -> {new_col}")
+                LOGGER.bcdebug(f"Renaming empty column name at index {i}: {col} -> {new_col}")
             elif col in dupl:                   # If duplicate, append the index number
                 dupl[col] += 1
                 cols.append(new_col := f"{col}_{dupl[col]}")
-                LOGGER.info(f"Renaming duplicate column name: {col} -> {new_col}")
+                LOGGER.bcdebug(f"Renaming duplicate column name: {col} -> {new_col}")
             else:                               # First occurrence of the column name, add it to dupl
                 dupl[col] = 0
                 cols.append(col)