Skip to content

Commit f7e656d

Browse files
Tony911029Tony Chan
andauthored
Update simulation scripts (#18)
Co-authored-by: Tony Chan <tony.chan@sciteline.com>
1 parent f6c705a commit f7e656d

File tree

11 files changed

+1648
-93
lines changed

11 files changed

+1648
-93
lines changed

meal_identification/data/external/vpatient_params_complete.csv

Lines changed: 364 additions & 0 deletions
Large diffs are not rendered by default.

meal_identification/data/external/vpatient_params_org.csv

Lines changed: 31 additions & 0 deletions
Large diffs are not rendered by default.

meal_identification/data/external/vpatient_params_sim.csv

Lines changed: 1001 additions & 0 deletions
Large diffs are not rendered by default.
-111 KB
Binary file not shown.
-39.8 KB
Binary file not shown.
-21.4 KB
Binary file not shown.
-30.1 KB
Binary file not shown.

meal_identification/meal_identification/datasets/dataset_generator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,7 @@ def dataset_creator(
115115
The processed DataFrames if `return_data` is True, else None.
116116
"""
117117
if keep_cols is None:
118-
keep_cols = ['date', 'bgl', 'msg_type', 'affects_fob', 'affects_iob',
119-
'dose_units', 'food_g', 'food_glycemic_index']
118+
keep_cols = ['date', 'bgl', 'msg_type', 'dose_units', 'food_g']
120119

121120
# Load data using DatasetTransformer
122121
patient_dfs_dict = load_data(raw_data_path=raw_data_path, keep_cols=keep_cols)

meal_identification/meal_identification/datasets/dataset_glucose_simulator.py

Lines changed: 76 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
11
import pandas as pd
22
import os
33
from simglucose.simulation.user_interface import simulate
4-
from simglucose.simulation.scenario import CustomScenario
54
from simglucose.simulation.scenario_gen import RandomScenario
65
from simglucose.controller.basal_bolus_ctrller import BBController
76
from meal_identification.datasets.dataset_operations import get_root_dir
87
from datetime import datetime, timedelta
8+
import random
99

1010

1111
def process_simulated_data(df):
1212
"""
1313
Process individual patient's glucose data into project-specific format.
14+
CHO -> food_g
15+
CGM -> bgl
16+
BG -> bgl_real
17+
Time -> date
1418
1519
Parameters
1620
----------
@@ -28,9 +32,6 @@ def process_simulated_data(df):
2832

2933
# Add required columns
3034
processed_df['msg_type'] = ''
31-
processed_df['food_glycemic_index'] = ''
32-
processed_df['affects_iob'] = ''
33-
processed_df['affects_fob'] = ''
3435
processed_df['dose_units'] = ''
3536

3637
# Map CGM to bgl column, Time to date and BG to bgl_real for reference only
@@ -44,74 +45,91 @@ def process_simulated_data(df):
4445
# Drop the CHO column
4546
processed_df = processed_df.drop(columns=['CHO'])
4647

48+
# Truncate data to save some space
49+
processed_df['bgl_real'] = processed_df['bgl_real'].round(2)
50+
processed_df['bgl'] = processed_df['bgl'].round(2)
51+
processed_df['food_g'] = processed_df['food_g'].round(2)
52+
processed_df['date'] = pd.to_datetime(processed_df['date']).dt.strftime('%Y-%m-%d %H:%M')
4753

4854
return processed_df
4955

5056

5157
def run_glucose_simulation(
5258
start_time=None,
5359
simulation_days=7,
54-
scenario_type='random',
55-
custom_meal_schedule=None,
5660
patient_names=None,
61+
seeds=None,
5762
cgm_name="Dexcom",
5863
insulin_pump_name="Cozmo",
59-
global_seed=123,
60-
animate=False,
6164
parallel=True,
6265
):
63-
# Set default values
66+
if seeds and len(patient_names) != len(seeds):
67+
raise ValueError(
68+
f"Length mismatch: patient_names has {len(patient_names)} elements while seeds has {len(seeds)} elements. Both lists must have the same length.")
6469
if start_time is None:
6570
start_time = pd.Timestamp('2024-01-01 00:00:00')
6671
if patient_names is None:
6772
patient_names = ['adult#001']
68-
if custom_meal_schedule is None and scenario_type == 'custom':
69-
custom_meal_schedule = [(1, 20)] # Default meal at hour 1 with 20g carbs
7073

71-
# Create controller
74+
# Set up result directory
75+
project_root = get_root_dir()
76+
result_dir = os.path.join(project_root, 'meal_identification', 'data', 'sim')
77+
os.makedirs(result_dir, exist_ok=True)
78+
79+
# Create a controller
7280
controller = BBController()
7381

7482
# Set up simulation time
7583
sim_time = pd.Timedelta(days=simulation_days)
7684

77-
# Scenario
78-
if scenario_type == 'custom':
79-
scenario = CustomScenario(
85+
# Generate a random seed for each patient for a better outcome
86+
# Tradeoff is that we can no longer parallelize the simulation process
87+
# but this is not intended to be run very regularly
88+
rand_seeds = []
89+
for idx, patient in enumerate(patient_names):
90+
if seeds is None:
91+
seed = random.randint(1, 1000)
92+
# Keep track of random seed for each patient
93+
rand_seeds.append(seed)
94+
else:
95+
# Use provided seeds for reproducibility
96+
seed = seeds[patient]
97+
98+
scenario = RandomScenario(
8099
start_time=start_time,
81-
scenario=custom_meal_schedule
100+
seed=seed
82101
)
83-
else:
84-
scenario = RandomScenario(
102+
103+
# Run simulation
104+
simulate(
105+
sim_time=sim_time,
106+
scenario=scenario,
107+
controller=controller,
85108
start_time=start_time,
86-
seed=global_seed
109+
save_path=result_dir,
110+
cgm_name=cgm_name,
111+
cgm_seed=seed,
112+
insulin_pump_name=insulin_pump_name,
113+
animate=False,
114+
parallel=parallel,
115+
patient_names=[patient],
87116
)
88117

89-
# Set up result directory
90-
project_root = get_root_dir()
91-
result_dir = os.path.join(project_root, 'meal_identification', 'data', 'sim')
92-
os.makedirs(result_dir, exist_ok=True)
118+
if rand_seeds:
119+
print("Random seeds: ", rand_seeds)
93120

94-
# Run simulation
95-
simulate(
96-
sim_time=sim_time,
97-
scenario=scenario,
98-
controller=controller,
99-
start_time=start_time,
100-
save_path=result_dir,
101-
cgm_name=cgm_name,
102-
cgm_seed=global_seed,
103-
insulin_pump_name=insulin_pump_name,
104-
animate=animate,
105-
parallel=parallel,
106-
patient_names=patient_names,
107-
)
121+
# Remove side products from the simulation
122+
for file in os.listdir(result_dir):
123+
if 'CVGA' in file or 'risk_trace' in file or 'performance' in file or file.endswith('.png'):
124+
file_path = os.path.join(result_dir, file)
125+
os.remove(file_path)
108126

109127
return result_dir
110128

111129

112130
def process_sim_data(simulation_days, naming):
113131
"""
114-
Process all patient CSV files in the sim directory and output them to data/raw.
132+
Process all patient CSV files in the sim/data to data/raw.
115133
116134
Returns:
117135
dict: Dictionary with patient IDs as keys and processed DataFrames as values
@@ -125,9 +143,9 @@ def process_sim_data(simulation_days, naming):
125143
csv_files = [f for f in os.listdir(sim_dir) if f.endswith('.csv')]
126144

127145
# Dictionary to store processed data for each patient
128-
processed_data = {}
146+
os.makedirs(processed_dir, exist_ok=True)
129147

130-
for file in csv_files:
148+
for idx, file in enumerate(csv_files):
131149
# Skip CVGA_stats.csv and risk_trace.csv
132150
if ('CVGA' in file) or ('risk_trace' in file) or ('performance' in file):
133151
continue
@@ -142,63 +160,50 @@ def process_sim_data(simulation_days, naming):
142160
# Process the data
143161
processed_df = process_simulated_data(df)
144162

163+
# Add id for each patient
164+
processed_df['id'] = idx
165+
145166
# Create new filename (first 3 + last 3 characters before .csv)
146167
base_name = file.replace('.csv', '')
147168
short_name = f"{base_name[:3]}{base_name[-3:]}"
148-
timestamp = datetime.today()
149-
to = timestamp + timedelta(days=simulation_days)
150-
start_date = timestamp.strftime('%Y-%m-%d')
169+
now = datetime.today()
170+
to = now + timedelta(days=simulation_days)
171+
start_date = now.strftime('%Y-%m-%d')
151172
end_date = to.strftime('%Y-%m-%d')
152173
file = f"{short_name}_{naming['cgm_name']}_{naming['insulin_pump_name']}_{start_date}_{end_date}.csv"
153174

154-
# Store in dictionary
155-
processed_data[file] = processed_df
156-
157-
print(f"Successfully processed {file}")
175+
# Save the files
176+
output_file = os.path.join(processed_dir, file)
177+
processed_df.to_csv(output_file)
178+
print(f"Successfully processed and saved {file}")
158179

159180
except Exception as e:
160181
print(f"Error processing {file}: {str(e)}")
161182

162-
# Save processed data
163-
os.makedirs(processed_dir, exist_ok=True)
164-
for file, df in processed_data.items():
165-
output_file = os.path.join(processed_dir, file)
166-
df.to_csv(output_file)
167-
print(f"Saved processed data for {file}")
168-
169-
return processed_data
170-
171183

172184
def generate_simulated_data(
173185
start_time=None,
174186
simulation_days=7,
175-
scenario_type='random',
176-
custom_meal_schedule=None,
177187
patient_names=None,
188+
seeds=None,
178189
cgm_name="Dexcom",
179190
insulin_pump_name="Cozmo",
180-
global_seed=123,
181-
animate=False,
182191
parallel=True,
183192
):
184193
"""
185194
Run a glucose simulation with specified parameters and output to data/raw.
186-
Animate and parallel can not be set to True at the same time for Mac. Not sure about Windows and Linux
187-
General data flow: Sim -> Raw
195+
General data flow:
196+
1. Generate simulate data to `data/sim`
197+
2. Process data in `data/sim` to `data/raw`
188198
189199
Parameters
190200
----------
191201
start_time (pd.Timestamp, optional): Start time for simulation. Defaults to '2024-01-01 00:00:00'.
192202
simulation_days (int, optional): Duration of simulation in days. Defaults to 7.
193-
scenario_type (str, optional): Type of scenario
194-
- 'random' | 'custom'. Defaults to 'random'.
195-
custom_meal_schedule (list, optional): List of tuples (hour, carbs) for custom scenario.
196203
cgm_name (str, optional): Name of the cgm device.
197204
- "Dexcom" | "GuardianRT" | "Navigator". Defaults to "Dexcom".
198205
insulin_pump_name (str, optional): Name of the insulin pump device.
199206
- "Cozmo" | "Insulet". Defaults to "Cozmo".
200-
global_seed (int, optional): Random seed for reproducibility. Defaults to 123.
201-
animate (bool, optional): Whether to animate the simulation. Defaults to False.
202207
parallel (bool, optional): Whether to run simulations in parallel. Defaults to True.
203208
patient_names (list, optional): List of patient IDs to simulate.
204209
- patient_names can be from adult#001 ~ adult#010, adolescent#001 ~ adolescent#010 and child#001 ~ child#010. Default to ["adult#001"].
@@ -210,16 +215,15 @@ def generate_simulated_data(
210215
run_glucose_simulation(
211216
start_time=start_time,
212217
simulation_days=simulation_days,
213-
scenario_type=scenario_type,
214-
custom_meal_schedule=custom_meal_schedule,
215218
patient_names=patient_names,
219+
seeds=seeds,
216220
cgm_name=cgm_name,
217221
insulin_pump_name=insulin_pump_name,
218-
global_seed=global_seed,
219-
animate=animate,
220222
parallel=parallel,
221223
)
222-
process_sim_data(simulation_days=simulation_days, naming={'cgm_name': cgm_name, 'insulin_pump_name': insulin_pump_name})
224+
process_sim_data(simulation_days=simulation_days,
225+
naming={'cgm_name': cgm_name, 'insulin_pump_name': insulin_pump_name})
226+
223227

224228
if __name__ == '__main__':
225229
# Example usage

0 commit comments

Comments
 (0)