HBClab
diff --git a/‎code/data_processing/save_utils.py‎
Lines changed: 83 additions & 0 deletions b/‎code/data_processing/save_utils.py‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎code/main_handler.py‎
Lines changed: 40 additions & 23 deletions b/‎code/main_handler.py‎
Lines changed: 40 additions & 23 deletions
diff --git a/‎data/int/NE/0/ATS/data/0_ses-33_cat-(1, {'A': np.float64(77.77777777777779), 'B': np.float64(94.44444444444444), 'C': np.float64(14.814814814814813)}).csv‎ renamed to ‎data/int/NE/0/ATS/data/0_ses-33_cat-1.csv‎ b/‎data/int/NE/0/ATS/data/0_ses-33_cat-(1, {'A': np.float64(77.77777777777779), 'B': np.float64(94.44444444444444), 'C': np.float64(14.814814814814813)}).csv‎ renamed to ‎data/int/NE/0/ATS/data/0_ses-33_cat-1.csv‎
diff --git a/‎data/int/NE/0/NTS/data/0_ses-33_cat-(1, {'A': np.float64(100.0), 'B': np.float64(91.66666666666666), 'C': np.float64(25.925925925925924)}).csv‎ renamed to ‎data/int/NE/0/NTS/data/0_ses-33_cat-1.csv‎ b/‎data/int/NE/0/NTS/data/0_ses-33_cat-(1, {'A': np.float64(100.0), 'B': np.float64(91.66666666666666), 'C': np.float64(25.925925925925924)}).csv‎ renamed to ‎data/int/NE/0/NTS/data/0_ses-33_cat-1.csv‎
diff --git a/‎data/int/NE/1/PC/data/1_ses-1_cat-(1, {np.int64(1): np.float64(41.66666666666667), np.int64(2): np.float64(56.666666666666664)}).csv‎ renamed to ‎data/int/NE/1/PC/data/1_ses-1_cat-1.csv‎ b/‎data/int/NE/1/PC/data/1_ses-1_cat-(1, {np.int64(1): np.float64(41.66666666666667), np.int64(2): np.float64(56.666666666666664)}).csv‎ renamed to ‎data/int/NE/1/PC/data/1_ses-1_cat-1.csv‎
diff --git a/‎data/int/NE/1111/LC/data/1111_ses-1_cat-(1, {np.int64(1): np.float64(55.55555555555556), np.int64(2): np.float64(76.19047619047619)}).csv‎ renamed to ‎data/int/NE/1111/LC/data/1111_ses-1_cat-1.csv‎ b/‎data/int/NE/1111/LC/data/1111_ses-1_cat-(1, {np.int64(1): np.float64(55.55555555555556), np.int64(2): np.float64(76.19047619047619)}).csv‎ renamed to ‎data/int/NE/1111/LC/data/1111_ses-1_cat-1.csv‎
@@ -1,7 +1,9 @@
 import os
+import re
 import pandas as pd
 import matplotlib.pyplot as plt
 from termcolor import cprint
+from pathlib import Path
 
 class SAVE_EVERYTHING:
     def __init__(self):
@@ -60,6 +62,87 @@ def save_plots(self, plots, task):
                     plt.close(plot_obj.figure)
 
 
+def normalize_category_exports(
+    base_dir: str | Path = "data",
+    dry_run: bool = False,
+) -> dict[str, list]:
+    """
+    Rename tuple-suffixed QC CSV exports so filenames carry only the scalar category.
+
+    For each file like ``*_cat-(1, {...}).csv`` we either:
+      * rename it to ``*_cat-1.csv`` when no normalized file already exists, or
+      * delete the tuple version if the normalized file is already present.
+
+    Args:
+        base_dir: Root directory to scan (defaults to project ``data`` folder).
+        dry_run: When True, report planned actions without renaming/deleting.
+
+    Returns:
+        dict with keys ``renamed`` (list of (old, new) Paths), ``deleted`` (list of Paths),
+        and ``skipped`` (Paths that matched the pattern but could not be normalized).
+    """
+    base_path = Path(base_dir).expanduser()
+    if not base_path.exists():
+        return {"renamed": [], "deleted": [], "skipped": []}
+
+    matches = sorted(base_path.rglob("*.csv"))
+    renamed: list[tuple[Path, Path]] = []
+    deleted: list[Path] = []
+    skipped: list[Path] = []
+
+    for csv_path in matches:
+        name = csv_path.name
+        if "cat-" not in name:
+            continue
+        prefix_part, suffix_part = name.split("cat-", 1)
+        if not suffix_part:
+            continue
+
+        first_char = suffix_part[0]
+        if first_char == "(":
+            cat_match = re.match(r"\((\d+)", suffix_part)
+        elif first_char == "[":
+            cat_match = re.match(r"\[(\d+)", suffix_part)
+        else:
+            continue
+
+        if not cat_match:
+            skipped.append(csv_path)
+            continue
+
+        category = cat_match.group(1)
+        new_name = f"{prefix_part}cat-{category}{csv_path.suffix}"
+        target_path = csv_path.with_name(new_name)
+
+        if target_path.exists():
+            deleted.append(csv_path)
+            if not dry_run:
+                try:
+                    csv_path.unlink()
+                except FileNotFoundError:
+                    continue
+            continue
+
+        renamed.append((csv_path, target_path))
+        if not dry_run:
+            try:
+                csv_path.rename(target_path)
+            except FileNotFoundError:
+                continue
+
+    if renamed or deleted:
+        msg = (
+            f"Normalized QC exports: {len(renamed)} renamed, "
+            f"{len(deleted)} duplicates removed."
+        )
+        cprint(msg, "yellow")
+
+    if skipped:
+        cprint(f"Skipped {len(skipped)} files; inspect patterns.", "red")
+
+    return {"renamed": renamed, "deleted": deleted, "skipped": skipped}
+
+
 """
 
 7000s- UI Observational
 
@@ -47,6 +47,21 @@ def __init__(self):
         atexit.register(self._persist_all_masters)
         self._skipped_subjects: list[dict[str, object]] = []
 
+    @staticmethod
+    def _normalize_category_value(category):
+        """Coerce QC categories to plain scalars so filenames remain clean."""
+        if category is None:
+            return None
+        if hasattr(category, "item"):
+            try:
+                category = category.item()
+            except Exception:
+                pass
+        try:
+            return int(category)
+        except (TypeError, ValueError):
+            return category
+
     def _atomic_to_csv(self, df: pd.DataFrame, path: Path, index: bool = False):
         """Write CSV atomically to avoid partial files."""
         tmp = path.with_suffix(path.suffix + ".tmp")
@@ -137,16 +152,17 @@ def qc_cc_dfs(self, dfs, task):
             else:
                 session = None
 
+            acc_by: dict = {}
             try:
                 # --- Run QC + plots (kept as you had it) ---
                 if task in ["AF", "NF"]:
-                    category, _ = qc_instance.cc_qc(df, threshold=0.5)
+                    category, acc_by = qc_instance.cc_qc(df, threshold=0.5)
                     plot = plot_instance.af_nf_plot(df)
                 elif task in ["NNB", "VNB"]:
-                    category, _ = qc_instance.cc_qc(df, threshold=0.5)
+                    category, acc_by = qc_instance.cc_qc(df, threshold=0.5)
                     plot = plot_instance.nnb_vnb_plot(df)
                 else:
-                    category = qc_instance.cc_qc(df, threshold=0.5, TS=True)
+                    category, acc_by = qc_instance.cc_qc(df, threshold=0.5, TS=True)
                     plot = plot_instance.ats_nts_plot(df)
             except ValueError as err:
                 message = str(err)
@@ -166,18 +182,13 @@ def qc_cc_dfs(self, dfs, task):
                     continue
                 raise
 
-            categories.append([subject, category, df])
+            normalized_category = self._normalize_category_value(category)
+            categories.append([subject, normalized_category, df])
             plots.append([subject, plot])
 
-            # --- Compute metrics by condition using your helpers ---
-            # Use the column names from qc_instance so this is task-agnostic
-            acc_by = utils.get_acc_by_block_cond(
-                df,
-                block_cond_column_name=qc_instance.COND_COLUMN_NAME,
-                acc_column_name=qc_instance.ACC_COLUMN_NAME,
-                correct_symbol=qc_instance.CORRECT_SYMBOL,
-                incorrect_symbol=qc_instance.INCORRECT_SYMBOL,
-            )
+            # --- Metrics by condition ---
+            # Reuse QC accuracies and recompute RTs for the master tables.
+            acc_by = {cond: float(val) for cond, val in (acc_by or {}).items()}
             rt_by = utils.get_avg_rt(
                 df,
                 rt_column_name=qc_instance.RT_COLUMN_NAME,
@@ -220,21 +231,23 @@ def qc_ps_dfs(self, dfs, task):
             ps_instance = PS_QC('response_time', 'correct', 1, 0, 'block_c', 30000)
             for df in dfs:
                 subject = df['subject_id'][1]
-                category = ps_instance.ps_qc(df, threshold=0.6,)
+                category, _ = ps_instance.ps_qc(df, threshold=0.6,)
                 if task == 'PC':
                     plot = plot_instance.lc_plot(df)
                 elif task == 'LC':
                     plot = plot_instance.lc_plot(df)
-                categories.append([subject, category, df])
+                normalized_category = self._normalize_category_value(category)
+                categories.append([subject, normalized_category, df])
                 plots.append([subject, plot])
 
         else:
             ps_instance = PS_QC('block_dur', 'correct', 1, 0, 'block_c', 125)
             for df in dfs:
                 subject = df['subject_id'][1]
-                category = ps_instance.ps_qc(df, threshold=0.6, DSST=True)
+                category, _ = ps_instance.ps_qc(df, threshold=0.6, DSST=True)
                 plot = plot_instance.dsst_plot(df)
-                categories.append([subject, category, df])
+                normalized_category = self._normalize_category_value(category)
+                categories.append([subject, normalized_category, df])
                 plots.append([subject, plot])
 
         save_instance = SAVE_EVERYTHING()
@@ -325,7 +338,7 @@ def qc_mem_dfs(self, dfs, task):
                     session = None
 
                 try:
-                    category = mem_instance.fn_sm_qc(df, threshold=0.5)
+                    category, _ = mem_instance.fn_sm_qc(df, threshold=0.5)
                     plot = plot_instance.fn_plot(df)
                 except ValueError as err:
                     message = str(err)
@@ -344,7 +357,8 @@ def qc_mem_dfs(self, dfs, task):
                         )
                         continue
                     raise
-                categories.append([subject, category, df])
+                normalized_category = self._normalize_category_value(category)
+                categories.append([subject, normalized_category, df])
                 plots.append([subject, plot])
         elif task in ['SM']:
             mem_instance = MEM_QC('response_time', 'correct', 1, 0, 'block_c', 2000)
@@ -367,7 +381,7 @@ def qc_mem_dfs(self, dfs, task):
                     session = None
 
                 try:
-                    category = mem_instance.fn_sm_qc(df, threshold=0.5)
+                    category, _ = mem_instance.fn_sm_qc(df, threshold=0.5)
                     plot = plot_instance.sm_plot(df)
                 except ValueError as err:
                     message = str(err)
@@ -386,7 +400,8 @@ def qc_mem_dfs(self, dfs, task):
                         )
                         continue
                     raise
-                categories.append([subject, category, df])
+                normalized_category = self._normalize_category_value(category)
+                categories.append([subject, normalized_category, df])
                 plots.append([subject, plot])
         save_instance = SAVE_EVERYTHING()
         save_instance.save_dfs(categories=categories, task=task)
@@ -503,7 +518,8 @@ def qc_wl_dfs(self, dfs, task):
                 }
                 self._upsert_wl_master(subject, session, upd)
 
-                categories.append([subject, category, df])
+                normalized_category = self._normalize_category_value(category)
+                categories.append([subject, normalized_category, df])
                 plots.append([subject, plot])
 
         elif task == 'DWL':
@@ -521,7 +537,8 @@ def qc_wl_dfs(self, dfs, task):
                 upd = {'delay': counts_delay['delay'].iat[0]}
                 self._upsert_wl_master(subject, session, upd)
 
-                categories.append([subject, category, df])
+                normalized_category = self._normalize_category_value(category)
+                categories.append([subject, normalized_category, df])
                 plots.append([subject, plot])
 
         # maybe: materialize wl_master back to columns if you prefer