|
1 | | -import os |
| 1 | +import os |
2 | 2 | import pandas as pd |
3 | | -import numpy as np |
| 3 | +import json |
4 | 4 |
|
5 | | - |
6 | | - |
7 | | -def create_json(data_folder): |
| 5 | +def create_json(data_folder, out_file='data.json'): |
8 | 6 | """ |
9 | | - Constructs a master list of all tasks grouped by Subject ID at application startup. |
10 | | -
|
11 | | - Output: |
12 | | - Dictionary: |
13 | | - subject_id: { |
14 | | - site: str, |
15 | | - project: str, # 'int' or 'obs' |
16 | | - tasks: { |
17 | | - task_name: { |
18 | | - date: str, |
19 | | - category: str, |
20 | | - png_paths: list |
21 | | - } |
22 | | - } |
23 | | - } |
| 7 | + Constructs a master list of all tasks grouped by Subject ID at application startup, |
| 8 | + with each session saved as a separate task entry (keyed as TASKNAME_ses-SESSION). |
24 | 9 | """ |
25 | 10 | directories = ['int', 'obs'] |
26 | 11 | master_data = {} |
27 | 12 |
|
28 | 13 | for directory in directories: |
29 | 14 | dir_path = os.path.join(data_folder, directory) |
30 | 15 |
|
31 | | - for site in os.listdir(dir_path): # Iterate over site folders (e.g., UI, NE) |
| 16 | + for site in os.listdir(dir_path): |
32 | 17 | site_path = os.path.join(dir_path, site) |
| 18 | + if not os.path.isdir(site_path): continue |
33 | 19 |
|
34 | | - if not os.path.isdir(site_path): |
35 | | - continue |
36 | | - |
37 | | - for subject_id in os.listdir(site_path): # Iterate over subject folders (e.g., 8006, 9002) |
| 20 | + for subject_id in os.listdir(site_path): |
38 | 21 | subject_path = os.path.join(site_path, subject_id) |
| 22 | + if not os.path.isdir(subject_path): continue |
39 | 23 |
|
40 | | - if not os.path.isdir(subject_path): |
41 | | - continue |
42 | | - |
43 | | - # Initialize subject entry if not already in master_data |
44 | | - if subject_id not in master_data: |
45 | | - master_data[subject_id] = { |
46 | | - 'site': site, |
47 | | - 'project': directory, |
48 | | - 'tasks': {} |
49 | | - } |
| 24 | + master_data.setdefault(subject_id, { |
| 25 | + 'site': site, |
| 26 | + 'project': directory, |
| 27 | + 'tasks': {} |
| 28 | + }) |
50 | 29 |
|
51 | | - for task_name in os.listdir(subject_path): # Iterate over task folders (e.g., AF, DSST) |
| 30 | + for task_name in os.listdir(subject_path): |
52 | 31 | task_path = os.path.join(subject_path, task_name) |
53 | | - |
54 | | - if not os.path.isdir(task_path): |
55 | | - continue |
| 32 | + if not os.path.isdir(task_path): continue |
56 | 33 |
|
57 | 34 | plots_path = os.path.join(task_path, 'plot') |
58 | | - data_path = os.path.join(task_path, 'data') |
| 35 | + data_path = os.path.join(task_path, 'data') |
59 | 36 |
|
60 | | - # Initialize task entry if not already in tasks |
61 | | - if task_name not in master_data[subject_id]['tasks']: |
62 | | - master_data[subject_id]['tasks'][task_name] = { |
63 | | - 'date': None, |
64 | | - 'category': None, |
65 | | - 'png_paths': [], |
66 | | - 'session': None |
67 | | - } |
| 37 | + # find all CSVs (one per session) |
| 38 | + if not os.path.isdir(data_path): |
| 39 | + continue |
| 40 | + csv_files = [f for f in os.listdir(data_path) if f.endswith('.csv')] |
68 | 41 |
|
69 | | - # Extract date and category from CSV in data directory |
70 | | - csv_file = [ |
71 | | - file for file in os.listdir(data_path) |
72 | | - if file.endswith('.csv') |
73 | | - ] |
74 | | - if csv_file: |
75 | | - csv_filename = csv_file[0] |
| 42 | + for csv_filename in csv_files: |
| 43 | + # parse session, category, and date |
| 44 | + parts = csv_filename.split('_') |
| 45 | + session_value = parts[-2].replace('ses-', '') |
| 46 | + category_value = parts[-1].replace('.csv', '').replace('cat-', '') |
76 | 47 |
|
77 | | - # Load the CSV into a DataFrame |
78 | 48 | df = pd.read_csv(os.path.join(data_path, csv_filename)) |
79 | | - |
80 | | - # Validate and extract the 'Date' column |
81 | | - if 'datetime' in df.columns: |
82 | | - date_value = df['datetime'].iloc[0] # Extract the first value in the 'Date' column |
83 | | - else: |
84 | | - date_value = None # Set to None or handle it as needed |
85 | | - |
86 | | - # Extract the category from the filename |
87 | | - category_value = csv_filename.split('_')[-1].replace('.csv', '').replace('cat-', '') |
88 | | - session_value = csv_filename.split('_')[-2].replace('ses-', '') |
89 | | - |
90 | | - # Update master_data |
91 | | - master_data[subject_id]['tasks'][task_name]['date'] = date_value |
92 | | - master_data[subject_id]['tasks'][task_name]['category'] = category_value |
93 | | - master_data[subject_id]['tasks'][task_name]['session'] = session_value |
94 | | - |
95 | | - # Remove the DataFrame from memory |
| 49 | + date_value = df['datetime'].iloc[0] if 'datetime' in df.columns else None |
96 | 50 | del df |
97 | 51 |
|
98 | | - # Collect PNG file paths from plot directory |
99 | | - if os.path.exists(plots_path): |
100 | | - png_files = [ |
101 | | - os.path.join(plots_path, png) |
102 | | - for png in os.listdir(plots_path) |
103 | | - if png.endswith('.png') |
104 | | - ] |
105 | | - master_data[subject_id]['tasks'][task_name]['png_paths'].extend(png_files) |
106 | | - |
107 | | - |
108 | | - import json |
109 | | - |
| 52 | + # build a unique task key for this session |
| 53 | + task_key = f"{task_name}_ses-{session_value}" |
| 54 | + |
| 55 | + # collect only PNGs for this session |
| 56 | + png_list = [] |
| 57 | + if os.path.isdir(plots_path): |
| 58 | + for png in os.listdir(plots_path): |
| 59 | + if png.endswith('.png') and f"ses-{session_value}" in png: |
| 60 | + png_list.append(os.path.join(plots_path, png)) |
| 61 | + |
| 62 | + # assign |
| 63 | + master_data[subject_id]['tasks'][task_key] = { |
| 64 | + 'date': date_value, |
| 65 | + 'category': category_value, |
| 66 | + 'png_paths': sorted(png_list), |
| 67 | + 'session': session_value |
| 68 | + } |
110 | 69 |
|
111 | | - with open('data.json', 'w') as f: |
| 70 | + # write out |
| 71 | + with open(out_file, 'w') as f: |
112 | 72 | json.dump(master_data, f, indent=2) |
113 | | - return None |
114 | | - |
115 | | - |
116 | 73 |
|
| 74 | + return master_data |
0 commit comments