Skip to content

Commit 26ab3da

Browse files
committed
added changes to no longer add tuple to pathname, ran helper to either rename or remove based on existence in directory
1 parent 850cc42 commit 26ab3da

File tree

1,311 files changed

+123
-93972
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,311 files changed

+123
-93972
lines changed

code/data_processing/save_utils.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import os
2+
import re
23
import pandas as pd
34
import matplotlib.pyplot as plt
45
from termcolor import cprint
6+
from pathlib import Path
57

68
class SAVE_EVERYTHING:
79
def __init__(self):
@@ -60,6 +62,87 @@ def save_plots(self, plots, task):
6062
plt.close(plot_obj.figure)
6163

6264

65+
def normalize_category_exports(
66+
base_dir: str | Path = "data",
67+
dry_run: bool = False,
68+
) -> dict[str, list]:
69+
"""
70+
Rename tuple-suffixed QC CSV exports so filenames carry only the scalar category.
71+
72+
For each file like ``*_cat-(1, {...}).csv`` we either:
73+
* rename it to ``*_cat-1.csv`` when no normalized file already exists, or
74+
* delete the tuple version if the normalized file is already present.
75+
76+
Args:
77+
base_dir: Root directory to scan (defaults to project ``data`` folder).
78+
dry_run: When True, report planned actions without renaming/deleting.
79+
80+
Returns:
81+
dict with keys ``renamed`` (list of (old, new) Paths), ``deleted`` (list of Paths),
82+
and ``skipped`` (Paths that matched the pattern but could not be normalized).
83+
"""
84+
base_path = Path(base_dir).expanduser()
85+
if not base_path.exists():
86+
return {"renamed": [], "deleted": [], "skipped": []}
87+
88+
matches = sorted(base_path.rglob("*.csv"))
89+
renamed: list[tuple[Path, Path]] = []
90+
deleted: list[Path] = []
91+
skipped: list[Path] = []
92+
93+
for csv_path in matches:
94+
name = csv_path.name
95+
if "cat-" not in name:
96+
continue
97+
prefix_part, suffix_part = name.split("cat-", 1)
98+
if not suffix_part:
99+
continue
100+
101+
first_char = suffix_part[0]
102+
if first_char == "(":
103+
cat_match = re.match(r"\((\d+)", suffix_part)
104+
elif first_char == "[":
105+
cat_match = re.match(r"\[(\d+)", suffix_part)
106+
else:
107+
continue
108+
109+
if not cat_match:
110+
skipped.append(csv_path)
111+
continue
112+
113+
category = cat_match.group(1)
114+
new_name = f"{prefix_part}cat-{category}{csv_path.suffix}"
115+
target_path = csv_path.with_name(new_name)
116+
117+
if target_path.exists():
118+
deleted.append(csv_path)
119+
if not dry_run:
120+
try:
121+
csv_path.unlink()
122+
except FileNotFoundError:
123+
continue
124+
continue
125+
126+
renamed.append((csv_path, target_path))
127+
if not dry_run:
128+
try:
129+
csv_path.rename(target_path)
130+
except FileNotFoundError:
131+
continue
132+
133+
if renamed or deleted:
134+
msg = (
135+
f"Normalized QC exports: {len(renamed)} renamed, "
136+
f"{len(deleted)} duplicates removed."
137+
)
138+
cprint(msg, "yellow")
139+
140+
if skipped:
141+
cprint(f"Skipped {len(skipped)} files; inspect patterns.", "red")
142+
143+
return {"renamed": renamed, "deleted": deleted, "skipped": skipped}
144+
145+
63146
"""
64147
65148
7000s- UI Observational

code/main_handler.py

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,21 @@ def __init__(self):
4747
atexit.register(self._persist_all_masters)
4848
self._skipped_subjects: list[dict[str, object]] = []
4949

50+
@staticmethod
51+
def _normalize_category_value(category):
52+
"""Coerce QC categories to plain scalars so filenames remain clean."""
53+
if category is None:
54+
return None
55+
if hasattr(category, "item"):
56+
try:
57+
category = category.item()
58+
except Exception:
59+
pass
60+
try:
61+
return int(category)
62+
except (TypeError, ValueError):
63+
return category
64+
5065
def _atomic_to_csv(self, df: pd.DataFrame, path: Path, index: bool = False):
5166
"""Write CSV atomically to avoid partial files."""
5267
tmp = path.with_suffix(path.suffix + ".tmp")
@@ -137,16 +152,17 @@ def qc_cc_dfs(self, dfs, task):
137152
else:
138153
session = None
139154

155+
acc_by: dict = {}
140156
try:
141157
# --- Run QC + plots (kept as you had it) ---
142158
if task in ["AF", "NF"]:
143-
category, _ = qc_instance.cc_qc(df, threshold=0.5)
159+
category, acc_by = qc_instance.cc_qc(df, threshold=0.5)
144160
plot = plot_instance.af_nf_plot(df)
145161
elif task in ["NNB", "VNB"]:
146-
category, _ = qc_instance.cc_qc(df, threshold=0.5)
162+
category, acc_by = qc_instance.cc_qc(df, threshold=0.5)
147163
plot = plot_instance.nnb_vnb_plot(df)
148164
else:
149-
category = qc_instance.cc_qc(df, threshold=0.5, TS=True)
165+
category, acc_by = qc_instance.cc_qc(df, threshold=0.5, TS=True)
150166
plot = plot_instance.ats_nts_plot(df)
151167
except ValueError as err:
152168
message = str(err)
@@ -166,18 +182,13 @@ def qc_cc_dfs(self, dfs, task):
166182
continue
167183
raise
168184

169-
categories.append([subject, category, df])
185+
normalized_category = self._normalize_category_value(category)
186+
categories.append([subject, normalized_category, df])
170187
plots.append([subject, plot])
171188

172-
# --- Compute metrics by condition using your helpers ---
173-
# Use the column names from qc_instance so this is task-agnostic
174-
acc_by = utils.get_acc_by_block_cond(
175-
df,
176-
block_cond_column_name=qc_instance.COND_COLUMN_NAME,
177-
acc_column_name=qc_instance.ACC_COLUMN_NAME,
178-
correct_symbol=qc_instance.CORRECT_SYMBOL,
179-
incorrect_symbol=qc_instance.INCORRECT_SYMBOL,
180-
)
189+
# --- Metrics by condition ---
190+
# Reuse QC accuracies and recompute RTs for the master tables.
191+
acc_by = {cond: float(val) for cond, val in (acc_by or {}).items()}
181192
rt_by = utils.get_avg_rt(
182193
df,
183194
rt_column_name=qc_instance.RT_COLUMN_NAME,
@@ -220,21 +231,23 @@ def qc_ps_dfs(self, dfs, task):
220231
ps_instance = PS_QC('response_time', 'correct', 1, 0, 'block_c', 30000)
221232
for df in dfs:
222233
subject = df['subject_id'][1]
223-
category = ps_instance.ps_qc(df, threshold=0.6,)
234+
category, _ = ps_instance.ps_qc(df, threshold=0.6,)
224235
if task == 'PC':
225236
plot = plot_instance.lc_plot(df)
226237
elif task == 'LC':
227238
plot = plot_instance.lc_plot(df)
228-
categories.append([subject, category, df])
239+
normalized_category = self._normalize_category_value(category)
240+
categories.append([subject, normalized_category, df])
229241
plots.append([subject, plot])
230242

231243
else:
232244
ps_instance = PS_QC('block_dur', 'correct', 1, 0, 'block_c', 125)
233245
for df in dfs:
234246
subject = df['subject_id'][1]
235-
category = ps_instance.ps_qc(df, threshold=0.6, DSST=True)
247+
category, _ = ps_instance.ps_qc(df, threshold=0.6, DSST=True)
236248
plot = plot_instance.dsst_plot(df)
237-
categories.append([subject, category, df])
249+
normalized_category = self._normalize_category_value(category)
250+
categories.append([subject, normalized_category, df])
238251
plots.append([subject, plot])
239252

240253
save_instance = SAVE_EVERYTHING()
@@ -325,7 +338,7 @@ def qc_mem_dfs(self, dfs, task):
325338
session = None
326339

327340
try:
328-
category = mem_instance.fn_sm_qc(df, threshold=0.5)
341+
category, _ = mem_instance.fn_sm_qc(df, threshold=0.5)
329342
plot = plot_instance.fn_plot(df)
330343
except ValueError as err:
331344
message = str(err)
@@ -344,7 +357,8 @@ def qc_mem_dfs(self, dfs, task):
344357
)
345358
continue
346359
raise
347-
categories.append([subject, category, df])
360+
normalized_category = self._normalize_category_value(category)
361+
categories.append([subject, normalized_category, df])
348362
plots.append([subject, plot])
349363
elif task in ['SM']:
350364
mem_instance = MEM_QC('response_time', 'correct', 1, 0, 'block_c', 2000)
@@ -367,7 +381,7 @@ def qc_mem_dfs(self, dfs, task):
367381
session = None
368382

369383
try:
370-
category = mem_instance.fn_sm_qc(df, threshold=0.5)
384+
category, _ = mem_instance.fn_sm_qc(df, threshold=0.5)
371385
plot = plot_instance.sm_plot(df)
372386
except ValueError as err:
373387
message = str(err)
@@ -386,7 +400,8 @@ def qc_mem_dfs(self, dfs, task):
386400
)
387401
continue
388402
raise
389-
categories.append([subject, category, df])
403+
normalized_category = self._normalize_category_value(category)
404+
categories.append([subject, normalized_category, df])
390405
plots.append([subject, plot])
391406
save_instance = SAVE_EVERYTHING()
392407
save_instance.save_dfs(categories=categories, task=task)
@@ -503,7 +518,8 @@ def qc_wl_dfs(self, dfs, task):
503518
}
504519
self._upsert_wl_master(subject, session, upd)
505520

506-
categories.append([subject, category, df])
521+
normalized_category = self._normalize_category_value(category)
522+
categories.append([subject, normalized_category, df])
507523
plots.append([subject, plot])
508524

509525
elif task == 'DWL':
@@ -521,7 +537,8 @@ def qc_wl_dfs(self, dfs, task):
521537
upd = {'delay': counts_delay['delay'].iat[0]}
522538
self._upsert_wl_master(subject, session, upd)
523539

524-
categories.append([subject, category, df])
540+
normalized_category = self._normalize_category_value(category)
541+
categories.append([subject, normalized_category, df])
525542
plots.append([subject, plot])
526543

527544
# maybe: materialize wl_master back to columns if you prefer

0 commit comments

Comments
 (0)