Skip to content

Commit 90904a8

Browse files
committed
fixing session logic in create_json
1 parent d0107f6 commit 90904a8

File tree

1 file changed

+47
-89
lines changed

1 file changed

+47
-89
lines changed
Lines changed: 47 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,116 +1,74 @@
1-
import os
1+
import os
22
import pandas as pd
3-
import numpy as np
3+
import json
44

5-
6-
7-
def create_json(data_folder):
5+
def create_json(data_folder, out_file='data.json'):
86
"""
9-
Constructs a master list of all tasks grouped by Subject ID at application startup.
10-
11-
Output:
12-
Dictionary:
13-
subject_id: {
14-
site: str,
15-
project: str, # 'int' or 'obs'
16-
tasks: {
17-
task_name: {
18-
date: str,
19-
category: str,
20-
png_paths: list
21-
}
22-
}
23-
}
7+
Constructs a master list of all tasks grouped by Subject ID at application startup,
8+
with each session saved as a separate task entry (keyed as TASKNAME_ses-SESSION).
249
"""
2510
directories = ['int', 'obs']
2611
master_data = {}
2712

2813
for directory in directories:
2914
dir_path = os.path.join(data_folder, directory)
3015

31-
for site in os.listdir(dir_path): # Iterate over site folders (e.g., UI, NE)
16+
for site in os.listdir(dir_path):
3217
site_path = os.path.join(dir_path, site)
18+
if not os.path.isdir(site_path): continue
3319

34-
if not os.path.isdir(site_path):
35-
continue
36-
37-
for subject_id in os.listdir(site_path): # Iterate over subject folders (e.g., 8006, 9002)
20+
for subject_id in os.listdir(site_path):
3821
subject_path = os.path.join(site_path, subject_id)
22+
if not os.path.isdir(subject_path): continue
3923

40-
if not os.path.isdir(subject_path):
41-
continue
42-
43-
# Initialize subject entry if not already in master_data
44-
if subject_id not in master_data:
45-
master_data[subject_id] = {
46-
'site': site,
47-
'project': directory,
48-
'tasks': {}
49-
}
24+
master_data.setdefault(subject_id, {
25+
'site': site,
26+
'project': directory,
27+
'tasks': {}
28+
})
5029

51-
for task_name in os.listdir(subject_path): # Iterate over task folders (e.g., AF, DSST)
30+
for task_name in os.listdir(subject_path):
5231
task_path = os.path.join(subject_path, task_name)
53-
54-
if not os.path.isdir(task_path):
55-
continue
32+
if not os.path.isdir(task_path): continue
5633

5734
plots_path = os.path.join(task_path, 'plot')
58-
data_path = os.path.join(task_path, 'data')
35+
data_path = os.path.join(task_path, 'data')
5936

60-
# Initialize task entry if not already in tasks
61-
if task_name not in master_data[subject_id]['tasks']:
62-
master_data[subject_id]['tasks'][task_name] = {
63-
'date': None,
64-
'category': None,
65-
'png_paths': [],
66-
'session': None
67-
}
37+
# find all CSVs (one per session)
38+
if not os.path.isdir(data_path):
39+
continue
40+
csv_files = [f for f in os.listdir(data_path) if f.endswith('.csv')]
6841

69-
# Extract date and category from CSV in data directory
70-
csv_file = [
71-
file for file in os.listdir(data_path)
72-
if file.endswith('.csv')
73-
]
74-
if csv_file:
75-
csv_filename = csv_file[0]
42+
for csv_filename in csv_files:
43+
# parse session, category, and date
44+
parts = csv_filename.split('_')
45+
session_value = parts[-2].replace('ses-', '')
46+
category_value = parts[-1].replace('.csv', '').replace('cat-', '')
7647

77-
# Load the CSV into a DataFrame
7848
df = pd.read_csv(os.path.join(data_path, csv_filename))
79-
80-
# Validate and extract the 'Date' column
81-
if 'datetime' in df.columns:
82-
date_value = df['datetime'].iloc[0] # Extract the first value in the 'Date' column
83-
else:
84-
date_value = None # Set to None or handle it as needed
85-
86-
# Extract the category from the filename
87-
category_value = csv_filename.split('_')[-1].replace('.csv', '').replace('cat-', '')
88-
session_value = csv_filename.split('_')[-2].replace('ses-', '')
89-
90-
# Update master_data
91-
master_data[subject_id]['tasks'][task_name]['date'] = date_value
92-
master_data[subject_id]['tasks'][task_name]['category'] = category_value
93-
master_data[subject_id]['tasks'][task_name]['session'] = session_value
94-
95-
# Remove the DataFrame from memory
49+
date_value = df['datetime'].iloc[0] if 'datetime' in df.columns else None
9650
del df
9751

98-
# Collect PNG file paths from plot directory
99-
if os.path.exists(plots_path):
100-
png_files = [
101-
os.path.join(plots_path, png)
102-
for png in os.listdir(plots_path)
103-
if png.endswith('.png')
104-
]
105-
master_data[subject_id]['tasks'][task_name]['png_paths'].extend(png_files)
106-
107-
108-
import json
109-
52+
# build a unique task key for this session
53+
task_key = f"{task_name}_ses-{session_value}"
54+
55+
# collect only PNGs for this session
56+
png_list = []
57+
if os.path.isdir(plots_path):
58+
for png in os.listdir(plots_path):
59+
if png.endswith('.png') and f"ses-{session_value}" in png:
60+
png_list.append(os.path.join(plots_path, png))
61+
62+
# assign
63+
master_data[subject_id]['tasks'][task_key] = {
64+
'date': date_value,
65+
'category': category_value,
66+
'png_paths': sorted(png_list),
67+
'session': session_value
68+
}
11069

111-
with open('data.json', 'w') as f:
70+
# write out
71+
with open(out_file, 'w') as f:
11272
json.dump(master_data, f, indent=2)
113-
return None
114-
115-
11673

74+
return master_data

0 commit comments

Comments
 (0)