Skip to content

Commit a843146

Browse files
committed
fixed qc file to glob
1 parent b63d97d commit a843146

File tree

1 file changed

+59
-70
lines changed

1 file changed

+59
-70
lines changed

code/utils/qc.py

Lines changed: 59 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,16 @@ def qc(self) -> None:
6565
for entry in os.listdir(self.base_dir):
6666
sub_path = os.path.join(self.base_dir, entry)
6767
if not os.path.isdir(sub_path) or not entry.startswith("sub-"):
68-
continue # Skip non-subject folders
68+
continue
6969

7070
accel_dir = os.path.join(sub_path, "accel")
7171
if not os.path.isdir(accel_dir):
72-
continue # No accel folder, skip
72+
continue
7373

7474
# Iterate over session folders inside the accel directory
7575
for session_folder in os.listdir(accel_dir):
7676
if not session_folder.startswith("ses"):
77-
continue # Not a session folder
77+
continue
7878

7979
ses_path = os.path.join(accel_dir, session_folder)
8080
if not os.path.isdir(ses_path):
@@ -85,76 +85,65 @@ def qc(self) -> None:
8585

8686
# Now construct path to results
8787
results_dir = os.path.join(ses_path, "output_accel", "results")
88-
8988
if not os.path.isdir(results_dir):
90-
continue # Results folder missing
91-
92-
# 1. Locate the QC report file
93-
qc_file = os.path.join(results_dir, "QC", "data_quality_report.csv")
94-
if not os.path.isfile(qc_file):
95-
# Skip if QC file missing
96-
continue
97-
98-
# 2. Locate the person summary CSV (filename starts with 'part5_personsummary')
99-
person_file = None
100-
for fname in os.listdir(results_dir):
101-
if fname.startswith("part5_personsummary") and fname.endswith(".csv"):
102-
person_file = os.path.join(results_dir, fname)
103-
break
104-
if person_file is None:
105-
# Skip if person summary missing
106-
continue
107-
108-
# 3. Locate the day summary CSV (filename starts with 'part5_daysummary')
109-
day_file = None
110-
for fname in os.listdir(results_dir):
111-
if fname.startswith("part5_daysummary") and fname.endswith(".csv"):
112-
day_file = os.path.join(results_dir, fname)
113-
break
114-
if day_file is None:
115-
# Skip if day summary missing
116-
continue
117-
118-
119-
# Extract subject ID and session from the QC file’s 'filename' column
120-
# and retrieve the relevant metrics
121-
dfs = [qc_file, person_file, day_file]
122-
metrics, sub, ses = self.extract_metrics(dfs)
123-
124-
# Unpack metrics
125-
cal_err, h_considered, valid_days, clean_code_series, calendar_date= metrics
126-
127-
# Run each QC check, which will append/update self.csv_path
128-
self.cal_error_check(cal_err, sub, ses)
129-
self.h_considered_check(h_considered, sub, ses)
130-
self.valid_days_check(sub, ses)
131-
self.cleaning_code_check(clean_code_series, calendar_date, sub, ses)
132-
133-
134-
135-
# Clean up DataFrames/variables to free memory before next iteration
136-
try:
137-
del metrics, cal_err, h_considered, valid_days, clean_code_series
138-
del dfs, person_file, day_file, qc_file
139-
del sub, ses
140-
except UnboundLocalError:
141-
# If any variable wasn’t set, ignore
142-
pass
143-
144-
# build the sub_path/accel/output_accel folder here to create plots
145-
all_ses_dir = os.path.join(sub_path,
146-
"accel",
147-
"output_accel",
148-
"results")
149-
person = pd.read_csv(os.path.join(all_ses_dir, "part5_personsummary_MM_L40M100V400_T5A5.csv"))
150-
day = pd.read_csv(os.path.join(all_ses_dir, "part5_daysummary_MM_L40M100V400_T5A5.csv"))
151-
plotter = ACT_PLOTS(sub, ses, person=person, day=day)
152-
plotter.summary_plot()
153-
plotter.day_plots()
89+
continue
90+
91+
# 1. QC report is still fixed
92+
qc_file = os.path.join(results_dir, "QC", "data_quality_report.csv")
93+
if not os.path.isfile(qc_file):
94+
continue
95+
96+
# 2. Locate the person summary using glob for MM
97+
person_matches = glob.glob(os.path.join(
98+
results_dir,
99+
"part5_personsummary_MM*.csv"
100+
))
101+
if not person_matches:
102+
continue
103+
person_file = person_matches[0]
104+
105+
# 3. Locate the day summary using glob for MM
106+
day_matches = glob.glob(os.path.join(
107+
results_dir,
108+
"part5_daysummary_MM*.csv"
109+
))
110+
if not day_matches:
111+
continue
112+
day_file = day_matches[0]
113+
114+
# Extract subject/session and metrics
115+
dfs = [qc_file, person_file, day_file]
116+
metrics, sub, ses = self.extract_metrics(dfs)
117+
cal_err, h_considered, valid_days, clean_code_series, calendar_date = metrics
118+
119+
# Run QC checks
120+
self.cal_error_check(cal_err, sub, ses)
121+
self.h_considered_check(h_considered, sub, ses)
122+
self.valid_days_check(sub, ses)
123+
self.cleaning_code_check(clean_code_series, calendar_date, sub, ses)
124+
125+
# Clean up
126+
try:
127+
del metrics, cal_err, h_considered, valid_days, clean_code_series
128+
del dfs, person_file, day_file, qc_file
129+
del sub, ses
130+
except UnboundLocalError:
131+
pass
132+
133+
# After per‐session QC, make summary plots using the MM files
134+
all_ses_dir = os.path.join(sub_path, "accel", "output_accel", "results")
135+
person_glob = glob.glob(os.path.join(all_ses_dir, "part5_personsummary_MM*.csv"))
136+
day_glob = glob.glob(os.path.join(all_ses_dir, "part5_daysummary_MM*.csv"))
137+
if person_glob and day_glob:
138+
person = pd.read_csv(person_glob[0])
139+
day = pd.read_csv(day_glob[0])
140+
plotter = ACT_PLOTS(sub, ses, person=person, day=day)
141+
plotter.summary_plot()
142+
plotter.day_plots()
143+
154144
# create the json file used in the application
155145
create_json('plots')
156-
157-
# End of qc loop
146+
# End of qc loop
158147

159148

160149
def extract_metrics(self, dfs: list) -> tuple:

0 commit comments

Comments
 (0)