66import re
77import time
88import datetime
9+
910pd .options .mode .chained_assignment = None
1011
1112def convert_to_list_rt (series ):
@@ -33,27 +34,31 @@ def convert_to_list_resp(series):
3334 return resp_list
3435
3536start = time .time ()
37+ session = sys .argv [1 ]
3638
39+ #session = "s1_r1"
3740input_dataset_path = "/home/data/NDClab/datasets/thrive-dataset/"
3841output_dataset_path = "/home/data/NDClab/analyses/thrive-theta-ddm/"
3942data_path = "sourcedata/checked/"
40- sub_path = "s1_r1 /psychopy/"
41- output_path = "derivatives/behavior/"
43+ sub_path = f" { session } /psychopy/"
44+ output_path = f "derivatives/behavior/ { session } /"
4245
43- date_time = datetime .datetime .now ().strftime ("%d-%m- %Y_%H_%M_%S" )
46+ date_time = datetime .datetime .now ().strftime ("%d_%m_ %Y_%H_%M_%S" )
4447sys .stdout = open (f"{ output_dataset_path } { output_path } { date_time } _log.txt" ,"wt" )
4548
4649n_blocks = 20
4750n_trials = 40
4851valid_rt_thresh = 0.150
4952
50- sub_folders = [i for i in os .listdir (input_dataset_path + data_path ) if i .startswith ("sub-" )]
51- subjects = sorted ([re .findall (r'\d+' , item )[0 ] for item in sub_folders ])
52-
53+ #sub_folders = [i for i in os.listdir(input_dataset_path + data_path) if i.startswith("sub-")]
54+ sub_folders = glob .glob (f"{ input_dataset_path } /{ data_path } /sub-*/{ sub_path } /*" )
55+ subjects = sorted (set ([re .findall (r'\d+' , item .split ("/" )[- 4 ])[0 ] for item in sub_folders ]))
56+ print (subjects )
5357processing_log = dict ()
5458summary_columns = [
5559 "n_trials" , "invalid_rt_percent" , "skipped_percent" ,
5660 "acc" , "acc_con" , "acc_incon" , "rt_con" , "rt_incon" , "rt_corr" , "rt_err" ,
61+ "rt_con_log" , "rt_incon_log" , "rt_corr_log" , "rt_err_log" ,
5762 "pes" , "pea" , "peri_acc" , "peri_rt" , "6_or_more_err" ,
5863 ]
5964processing_log ["sub" ] = []
@@ -72,17 +77,19 @@ def convert_to_list_resp(series):
7277 processing_log ["sub" ].append (sub )
7378 subject_folder = (input_dataset_path + data_path + "sub-" + sub + os .sep + sub_path )
7479 num_files = len (os .listdir (subject_folder ))
75-
76- if (num_files != 3 ) and (sub not in ["3000124" , "3000008" , "3000014" ]):
80+ if ((num_files != 3 ) and (sub not in ["3000124" , "3000008" , "3000014" ]) and session == "s1_r1" ) or (np .any (["deviation" in i for i in os .listdir (subject_folder )])):
7781 processing_log ["success" ].append (0 )
7882 print ("sub-{} has a deviation in psychopy data ({} files), skipping ..." .format (sub , num_files ))
7983 [processing_log [i ].append (np .nan ) for i in list (processing_log .keys ())[2 :]]
8084 pass
85+ # elif:
86+ # os.path.exists(f"{output_dataset_path}{output_path}sub-{sub}_trial_data.csv"):
87+ # print(f"sub-{sub} is already processed! Skipping...")
8188 else :
8289 print ("Processing sub-{}..." .format (sub ))
8390 processing_log ["success" ].append (1 )
8491
85- pattern = "{} sub-{}_arrow-alert-v1-*_psychopy_s1_r1_e1 .csv". format ( subject_folder , sub )
92+ pattern = f" { subject_folder } / sub-{ sub } _arrow-alert-v1-*_psychopy_ { session } _e1 .csv"
8693 filename = glob .glob (pattern )
8794 data = pd .read_csv (filename [0 ])
8895 start_index = data ["task_blockText.started" ].first_valid_index ()
@@ -218,9 +225,13 @@ def convert_to_list_resp(series):
218225 processing_log ["acc_con" + prefix ].append (np .round (condition_data [condition_data ["congruent" ] == 1 ].accuracy .mean (), 3 ))
219226 processing_log ["acc_incon" + prefix ].append (np .round (condition_data [condition_data ["congruent" ] == 0 ].accuracy .mean (), 3 ))
220227 processing_log ["rt_con" + prefix ].append (np .round (condition_data [(condition_data ["congruent" ] == 1 ) & (condition_data ["accuracy" ] == 1 )]["rt" ].mean () * 1000 , 3 ))
228+ processing_log ["rt_con_log" + prefix ].append (np .round (np .log (condition_data [(condition_data ["congruent" ] == 1 ) & (condition_data ["accuracy" ] == 1 )]["rt" ]).mean () * 1000 , 3 ))
221229 processing_log ["rt_incon" + prefix ].append (np .round (condition_data [(condition_data ["congruent" ] == 0 ) & (condition_data ["accuracy" ] == 1 )]["rt" ].mean () * 1000 , 3 ))
230+ processing_log ["rt_incon_log" + prefix ].append (np .round (np .log (condition_data [(condition_data ["congruent" ] == 0 ) & (condition_data ["accuracy" ] == 1 )]["rt" ]).mean () * 1000 , 3 ))
222231 processing_log ["rt_corr" + prefix ].append (np .round (condition_data [(condition_data ["congruent" ] == 0 ) & (condition_data ["accuracy" ] == 1 )]["rt" ].mean () * 1000 , 3 ))
232+ processing_log ["rt_corr_log" + prefix ].append (np .round (np .log (condition_data [(condition_data ["congruent" ] == 0 ) & (condition_data ["accuracy" ] == 1 )]["rt" ]).mean () * 1000 , 3 ))
223233 processing_log ["rt_err" + prefix ].append (np .round (condition_data [(condition_data ["congruent" ] == 0 ) & (condition_data ["accuracy" ] == 0 )]["rt" ].mean () * 1000 , 3 ))
234+ processing_log ["rt_err_log" + prefix ].append (np .round (np .log (condition_data [(condition_data ["congruent" ] == 0 ) & (condition_data ["accuracy" ] == 0 )]["rt" ]).mean () * 1000 , 3 ))
224235 condition_data = condition_data [(condition_data ["pre_valid_rt" ] == 1 ) & (condition_data ["pre_extra_resp" ] == 0 ) & (condition_data ["pre_no_resp" ] == 0 )]
225236 processing_log ["pes" + prefix ].append (np .round (
226237 np .log (
@@ -277,14 +288,14 @@ def convert_to_list_resp(series):
277288
278289 print (f"sub-{ sub } has been processed" )
279290
280- pd .DataFrame (processing_log ).to_csv (f"{ output_dataset_path } { output_path } summary .csv" , index = False )
291+ pd .DataFrame (processing_log ).to_csv (f"{ output_dataset_path } { output_path } summary_ { date_time } .csv" , index = False )
281292
282293list_of_ind_csv = []
283294for df in sorted ([i for i in os .listdir (f"{ output_dataset_path } { output_path } " ) if "sub-" in i ]):
284295 list_of_ind_csv .append (pd .read_csv (f"{ output_dataset_path } { output_path } { df } " ))
285296full_df = pd .concat (list_of_ind_csv )
286297# full_df = full_df[(full_df["pre_accuracy"] == 1) | (full_df["pre_accuracy"] == 0)]
287- full_df .to_csv (f"{ output_dataset_path } { output_path } full_df .csv" , index = False )
298+ full_df .to_csv (f"{ output_dataset_path } { output_path } full_df_ { date_time } .csv" , index = False )
288299
289300end = time .time ()
290301print (f"Executed time { np .round (end - start , 2 )} s" )
0 commit comments