11import pandas as pd
22import os
33from simglucose .simulation .user_interface import simulate
4- from simglucose .simulation .scenario import CustomScenario
54from simglucose .simulation .scenario_gen import RandomScenario
65from simglucose .controller .basal_bolus_ctrller import BBController
76from meal_identification .datasets .dataset_operations import get_root_dir
87from datetime import datetime , timedelta
8+ import random
99
1010
1111def process_simulated_data (df ):
1212 """
1313 Process individual patient's glucose data into project-specific format.
14+ CHO -> food_g
15+ CGM -> bgl
16+ BG -> bgl_real
17+ Time -> date
1418
1519 Parameters
1620 ----------
@@ -28,9 +32,6 @@ def process_simulated_data(df):
2832
2933 # Add required columns
3034 processed_df ['msg_type' ] = ''
31- processed_df ['food_glycemic_index' ] = ''
32- processed_df ['affects_iob' ] = ''
33- processed_df ['affects_fob' ] = ''
3435 processed_df ['dose_units' ] = ''
3536
3637 # Map CGM to bgl column, Time to date and BG to bgl_real for reference only
@@ -44,74 +45,91 @@ def process_simulated_data(df):
4445 # Drop the CHO column
4546 processed_df = processed_df .drop (columns = ['CHO' ])
4647
48+ # Truncate data to save some space
49+ processed_df ['bgl_real' ] = processed_df ['bgl_real' ].round (2 )
50+ processed_df ['bgl' ] = processed_df ['bgl' ].round (2 )
51+ processed_df ['food_g' ] = processed_df ['food_g' ].round (2 )
52+ processed_df ['date' ] = pd .to_datetime (processed_df ['date' ]).dt .strftime ('%Y-%m-%d %H:%M' )
4753
4854 return processed_df
4955
5056
5157def run_glucose_simulation (
5258 start_time = None ,
5359 simulation_days = 7 ,
54- scenario_type = 'random' ,
55- custom_meal_schedule = None ,
5660 patient_names = None ,
61+ seeds = None ,
5762 cgm_name = "Dexcom" ,
5863 insulin_pump_name = "Cozmo" ,
59- global_seed = 123 ,
60- animate = False ,
6164 parallel = True ,
6265):
63- # Set default values
66+ if seeds and len (patient_names ) != len (seeds ):
67+ raise ValueError (
68+ f"Length mismatch: patient_names has { len (patient_names )} elements while seeds has { len (seeds )} elements. Both lists must have the same length." )
6469 if start_time is None :
6570 start_time = pd .Timestamp ('2024-01-01 00:00:00' )
6671 if patient_names is None :
6772 patient_names = ['adult#001' ]
68- if custom_meal_schedule is None and scenario_type == 'custom' :
69- custom_meal_schedule = [(1 , 20 )] # Default meal at hour 1 with 20g carbs
7073
71- # Create controller
74+ # Set up result directory
75+ project_root = get_root_dir ()
76+ result_dir = os .path .join (project_root , 'meal_identification' , 'data' , 'sim' )
77+ os .makedirs (result_dir , exist_ok = True )
78+
79+ # Create a controller
7280 controller = BBController ()
7381
7482 # Set up simulation time
7583 sim_time = pd .Timedelta (days = simulation_days )
7684
77- # Scenario
78- if scenario_type == 'custom' :
79- scenario = CustomScenario (
85+ # Generate a random seed for each patient for a better outcome
86+ # Tradeoff is that we can no longer parallelize the simulation process
87+ # but this is not intended to be run very regularly
88+ rand_seeds = []
89+ for idx , patient in enumerate (patient_names ):
90+ if seeds is None :
91+ seed = random .randint (1 , 1000 )
92+ # Keep track of random seed for each patient
93+ rand_seeds .append (seed )
94+ else :
95+ # Use provided seeds for reproducibility
96+ seed = seeds [patient ]
97+
98+ scenario = RandomScenario (
8099 start_time = start_time ,
81- scenario = custom_meal_schedule
100+ seed = seed
82101 )
83- else :
84- scenario = RandomScenario (
102+
103+ # Run simulation
104+ simulate (
105+ sim_time = sim_time ,
106+ scenario = scenario ,
107+ controller = controller ,
85108 start_time = start_time ,
86- seed = global_seed
109+ save_path = result_dir ,
110+ cgm_name = cgm_name ,
111+ cgm_seed = seed ,
112+ insulin_pump_name = insulin_pump_name ,
113+ animate = False ,
114+ parallel = parallel ,
115+ patient_names = [patient ],
87116 )
88117
89- # Set up result directory
90- project_root = get_root_dir ()
91- result_dir = os .path .join (project_root , 'meal_identification' , 'data' , 'sim' )
92- os .makedirs (result_dir , exist_ok = True )
118+ if rand_seeds :
119+ print ("Random seeds: " , rand_seeds )
93120
94- # Run simulation
95- simulate (
96- sim_time = sim_time ,
97- scenario = scenario ,
98- controller = controller ,
99- start_time = start_time ,
100- save_path = result_dir ,
101- cgm_name = cgm_name ,
102- cgm_seed = global_seed ,
103- insulin_pump_name = insulin_pump_name ,
104- animate = animate ,
105- parallel = parallel ,
106- patient_names = patient_names ,
107- )
121+ # Remove side products from the simulation
122+ for file in os .listdir (result_dir ):
123+ if 'CVGA' in file or 'risk_trace' in file or 'performance' in file or file .endswith ('.png' ):
124+ file_path = os .path .join (result_dir , file )
125+ os .remove (file_path )
108126
109127 return result_dir
110128
111129
112130def process_sim_data (simulation_days , naming ):
113131 """
114- Process all patient CSV files in the sim directory and output them to data/raw.
132+ Process all patient CSV files in the sim/data to data/raw.
115133
116134 Returns:
117135 dict: Dictionary with patient IDs as keys and processed DataFrames as values
@@ -125,9 +143,9 @@ def process_sim_data(simulation_days, naming):
125143 csv_files = [f for f in os .listdir (sim_dir ) if f .endswith ('.csv' )]
126144
127145 # Dictionary to store processed data for each patient
128- processed_data = {}
146+ os . makedirs ( processed_dir , exist_ok = True )
129147
130- for file in csv_files :
148+ for idx , file in enumerate ( csv_files ) :
131149 # Skip CVGA_stats.csv and risk_trace.csv
132150 if ('CVGA' in file ) or ('risk_trace' in file ) or ('performance' in file ):
133151 continue
@@ -142,63 +160,50 @@ def process_sim_data(simulation_days, naming):
142160 # Process the data
143161 processed_df = process_simulated_data (df )
144162
163+ # Add id for each patient
164+ processed_df ['id' ] = idx
165+
145166 # Create new filename (first 3 + last 3 characters before .csv)
146167 base_name = file .replace ('.csv' , '' )
147168 short_name = f"{ base_name [:3 ]} { base_name [- 3 :]} "
148- timestamp = datetime .today ()
149- to = timestamp + timedelta (days = simulation_days )
150- start_date = timestamp .strftime ('%Y-%m-%d' )
169+ now = datetime .today ()
170+ to = now + timedelta (days = simulation_days )
171+ start_date = now .strftime ('%Y-%m-%d' )
151172 end_date = to .strftime ('%Y-%m-%d' )
152173 file = f"{ short_name } _{ naming ['cgm_name' ]} _{ naming ['insulin_pump_name' ]} _{ start_date } _{ end_date } .csv"
153174
154- # Store in dictionary
155- processed_data [ file ] = processed_df
156-
157- print (f"Successfully processed { file } " )
175+ # Save the files
176+ output_file = os . path . join ( processed_dir , file )
177+ processed_df . to_csv ( output_file )
178+ print (f"Successfully processed and saved { file } " )
158179
159180 except Exception as e :
160181 print (f"Error processing { file } : { str (e )} " )
161182
162- # Save processed data
163- os .makedirs (processed_dir , exist_ok = True )
164- for file , df in processed_data .items ():
165- output_file = os .path .join (processed_dir , file )
166- df .to_csv (output_file )
167- print (f"Saved processed data for { file } " )
168-
169- return processed_data
170-
171183
172184def generate_simulated_data (
173185 start_time = None ,
174186 simulation_days = 7 ,
175- scenario_type = 'random' ,
176- custom_meal_schedule = None ,
177187 patient_names = None ,
188+ seeds = None ,
178189 cgm_name = "Dexcom" ,
179190 insulin_pump_name = "Cozmo" ,
180- global_seed = 123 ,
181- animate = False ,
182191 parallel = True ,
183192):
184193 """
185194 Run a glucose simulation with specified parameters and output to data/raw.
186- Animate and parallel can not be set to True at the same time for Mac. Not sure about Windows and Linux
187- General data flow: Sim -> Raw
195+ General data flow:
196+ 1. Generate simulate data to `data/sim`
197+ 2. Process data in `data/sim` to `data/raw`
188198
189199 Parameters
190200 ----------
191201 start_time (pd.Timestamp, optional): Start time for simulation. Defaults to '2024-01-01 00:00:00'.
192202 simulation_days (int, optional): Duration of simulation in days. Defaults to 7.
193- scenario_type (str, optional): Type of scenario
194- - 'random' | 'custom'. Defaults to 'random'.
195- custom_meal_schedule (list, optional): List of tuples (hour, carbs) for custom scenario.
196203 cgm_name (str, optional): Name of the cgm device.
197204 - "Dexcom" | "GuardianRT" | "Navigator". Defaults to "Dexcom".
198205 insulin_pump_name (str, optional): Name of the insulin pump device.
199206 - "Cozmo" | "Insulet". Defaults to "Cozmo".
200- global_seed (int, optional): Random seed for reproducibility. Defaults to 123.
201- animate (bool, optional): Whether to animate the simulation. Defaults to False.
202207 parallel (bool, optional): Whether to run simulations in parallel. Defaults to True.
203208 patient_names (list, optional): List of patient IDs to simulate.
204209 - patient_names can be from adult#001 ~ adult#010, adolescent#001 ~ adolescent#010 and child#001 ~ child#010. Default to ["adult#001"].
@@ -210,16 +215,15 @@ def generate_simulated_data(
210215 run_glucose_simulation (
211216 start_time = start_time ,
212217 simulation_days = simulation_days ,
213- scenario_type = scenario_type ,
214- custom_meal_schedule = custom_meal_schedule ,
215218 patient_names = patient_names ,
219+ seeds = seeds ,
216220 cgm_name = cgm_name ,
217221 insulin_pump_name = insulin_pump_name ,
218- global_seed = global_seed ,
219- animate = animate ,
220222 parallel = parallel ,
221223 )
222- process_sim_data (simulation_days = simulation_days , naming = {'cgm_name' : cgm_name , 'insulin_pump_name' : insulin_pump_name })
224+ process_sim_data (simulation_days = simulation_days ,
225+ naming = {'cgm_name' : cgm_name , 'insulin_pump_name' : insulin_pump_name })
226+
223227
224228if __name__ == '__main__' :
225229 # Example usage
0 commit comments