|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +EXP-2808: Natural Experiments — Exogenous Variation for Causal Estimates |
| 4 | +======================================================================== |
| 5 | +
|
| 6 | +Rationale: |
| 7 | + The fundamental challenge in AID data is confounding by indication: |
| 8 | + the controller adjusts insulin BECAUSE of glucose state, making it |
| 9 | + impossible to observe what would have happened without intervention. |
| 10 | + |
| 11 | + Natural experiments exploit exogenous events that break the feedback loop: |
| 12 | + 1. Sensor gaps — controller blind, runs on defaults |
| 13 | + 2. Pump site changes — absorption changes discontinuously |
| 14 | + 3. Controller restarts — fresh IOB estimate, temporary mismatch |
| 15 | + 4. Sensor warm-up — known 2h period with no CGM input |
| 16 | + |
| 17 | + These create quasi-random variation in effective insulin delivery |
| 18 | + that is NOT confounded by current glucose state. |
| 19 | + |
| 20 | + We look for: |
| 21 | + - Discontinuities in glucose patterns around these events |
| 22 | + - The "open-loop" periods reveal true ISF without controller compensation |
| 23 | + - Differences in BG behavior just before/after restart reveal controller effect |
| 24 | +
|
| 25 | +Success criteria: |
| 26 | + P1: Identify ≥100 natural experiment events across dataset |
| 27 | + P2: Sensor gap glucose behavior differs from non-gap periods (p<0.05) |
| 28 | + P3: Post-restart glucose volatility > pre-restart (controller settling) |
| 29 | + P4: ISF estimated from natural experiments differs from profile (evidence of bias) |
| 30 | + P5: Causal direction test passes (pre→post, not reverse) |
| 31 | +""" |
| 32 | + |
| 33 | +import json |
| 34 | +import warnings |
| 35 | +from pathlib import Path |
| 36 | +from datetime import datetime |
| 37 | + |
| 38 | +import numpy as np |
| 39 | +import pandas as pd |
| 40 | +from scipy import stats as sp_stats |
| 41 | + |
| 42 | +warnings.filterwarnings('ignore') |
| 43 | + |
| 44 | +EXP_ID = 2808 |
| 45 | +TITLE = "Natural Experiments — Exogenous Variation" |
| 46 | +EXCLUDE = {'odc-84181797', 'h', 'j'} |
| 47 | + |
| 48 | +grid = pd.read_parquet("externals/ns-parquet/training/grid.parquet") |
| 49 | +grid = grid[~grid['patient_id'].isin(EXCLUDE)].copy() |
| 50 | +grid = grid.sort_values(['patient_id', 'time']) |
| 51 | + |
| 52 | +def classify_controller(pid): |
| 53 | + if len(pid) == 1 and pid.isalpha(): |
| 54 | + return 'Loop' |
| 55 | + elif pid.startswith('ns-'): |
| 56 | + return 'Trio' |
| 57 | + elif pid.startswith('odc-'): |
| 58 | + return 'OpenAPS' |
| 59 | + return 'Unknown' |
| 60 | + |
| 61 | +grid['controller'] = grid['patient_id'].apply(classify_controller) |
| 62 | +patients = sorted(grid['patient_id'].unique()) |
| 63 | + |
| 64 | +print(f"Patients: {len(patients)}") |
| 65 | +print("\n" + "=" * 70) |
| 66 | +print("PART 1: Identify Natural Experiment Events") |
| 67 | +print("=" * 70) |
| 68 | + |
| 69 | +# ══════════════════════════════════════════════════════════════════════════ |
| 70 | +# DETECT EXOGENOUS EVENTS |
| 71 | +# ══════════════════════════════════════════════════════════════════════════ |
| 72 | + |
| 73 | +all_events = [] |
| 74 | + |
| 75 | +for pid in patients: |
| 76 | + pdf = grid[grid['patient_id'] == pid].reset_index(drop=True) |
| 77 | + n = len(pdf) |
| 78 | + ctrl = classify_controller(pid) |
| 79 | + |
| 80 | + gluc = pdf['glucose'].values |
| 81 | + bolus_v = pdf['bolus'].fillna(0).values |
| 82 | + smb_v = pdf['bolus_smb'].fillna(0).values |
| 83 | + net_basal_v = pdf['net_basal'].fillna(0).values |
| 84 | + |
| 85 | + # ── Type 1: Sensor Gaps (NaN glucose for ≥30min = 6 steps) ── |
| 86 | + nan_mask = np.isnan(gluc) |
| 87 | + gap_starts = [] |
| 88 | + in_gap = False |
| 89 | + gap_start = 0 |
| 90 | + for i in range(n): |
| 91 | + if nan_mask[i] and not in_gap: |
| 92 | + gap_start = i |
| 93 | + in_gap = True |
| 94 | + elif not nan_mask[i] and in_gap: |
| 95 | + gap_len = i - gap_start |
| 96 | + if gap_len >= 6: # ≥30 min |
| 97 | + gap_starts.append((gap_start, i, gap_len)) |
| 98 | + in_gap = False |
| 99 | + |
| 100 | + for start, end, length in gap_starts: |
| 101 | + # Need valid glucose before and after |
| 102 | + if start < 24 or end + 24 >= n: |
| 103 | + continue |
| 104 | + pre_gluc = gluc[start-24:start] |
| 105 | + post_gluc = gluc[end:end+24] |
| 106 | + if np.isnan(pre_gluc).sum() > 6 or np.isnan(post_gluc).sum() > 6: |
| 107 | + continue |
| 108 | + |
| 109 | + all_events.append({ |
| 110 | + 'patient': pid, 'controller': ctrl, |
| 111 | + 'type': 'sensor_gap', |
| 112 | + 'position': start, |
| 113 | + 'duration_min': length * 5, |
| 114 | + 'pre_mean': np.nanmean(pre_gluc), |
| 115 | + 'post_mean': np.nanmean(post_gluc), |
| 116 | + 'pre_std': np.nanstd(pre_gluc), |
| 117 | + 'post_std': np.nanstd(post_gluc), |
| 118 | + 'pre_trend': np.nanmean(np.diff(pre_gluc[~np.isnan(pre_gluc)])), |
| 119 | + 'post_trend': np.nanmean(np.diff(post_gluc[~np.isnan(post_gluc)])), |
| 120 | + }) |
| 121 | + |
| 122 | + # ── Type 2: Insulin Gaps (no delivery for ≥1h = possible site change) ── |
| 123 | + total_ins = bolus_v + smb_v + np.maximum(net_basal_v / 12.0, 0) |
| 124 | + zero_ins = (total_ins == 0).astype(int) |
| 125 | + insulin_gap_start = None |
| 126 | + for i in range(n): |
| 127 | + if zero_ins[i] and insulin_gap_start is None: |
| 128 | + insulin_gap_start = i |
| 129 | + elif not zero_ins[i] and insulin_gap_start is not None: |
| 130 | + gap_len = i - insulin_gap_start |
| 131 | + if gap_len >= 12: # ≥1 hour of no insulin |
| 132 | + if insulin_gap_start >= 24 and i + 24 < n: |
| 133 | + pre_gluc = gluc[insulin_gap_start-24:insulin_gap_start] |
| 134 | + post_gluc = gluc[i:i+24] |
| 135 | + if np.isnan(pre_gluc).sum() < 6 and np.isnan(post_gluc).sum() < 6: |
| 136 | + all_events.append({ |
| 137 | + 'patient': pid, 'controller': ctrl, |
| 138 | + 'type': 'insulin_gap', |
| 139 | + 'position': insulin_gap_start, |
| 140 | + 'duration_min': gap_len * 5, |
| 141 | + 'pre_mean': np.nanmean(pre_gluc), |
| 142 | + 'post_mean': np.nanmean(post_gluc), |
| 143 | + 'pre_std': np.nanstd(pre_gluc), |
| 144 | + 'post_std': np.nanstd(post_gluc), |
| 145 | + 'pre_trend': np.nanmean(np.diff(pre_gluc[~np.isnan(pre_gluc)])), |
| 146 | + 'post_trend': np.nanmean(np.diff(post_gluc[~np.isnan(post_gluc)])), |
| 147 | + }) |
| 148 | + insulin_gap_start = None |
| 149 | + |
| 150 | + # ── Type 3: Large BG Discontinuities (possible sensor restart) ── |
| 151 | + for i in range(1, n-24): |
| 152 | + if np.isnan(gluc[i]) or np.isnan(gluc[i-1]): |
| 153 | + continue |
| 154 | + jump = abs(gluc[i] - gluc[i-1]) |
| 155 | + if jump > 40: # >40 mg/dL in 5 minutes = likely sensor restart |
| 156 | + if i >= 24 and i + 24 < n: |
| 157 | + pre_gluc = gluc[i-24:i] |
| 158 | + post_gluc = gluc[i:i+24] |
| 159 | + if np.isnan(pre_gluc).sum() < 6 and np.isnan(post_gluc).sum() < 6: |
| 160 | + all_events.append({ |
| 161 | + 'patient': pid, 'controller': ctrl, |
| 162 | + 'type': 'bg_discontinuity', |
| 163 | + 'position': i, |
| 164 | + 'duration_min': 5, |
| 165 | + 'jump_size': jump, |
| 166 | + 'pre_mean': np.nanmean(pre_gluc), |
| 167 | + 'post_mean': np.nanmean(post_gluc), |
| 168 | + 'pre_std': np.nanstd(pre_gluc), |
| 169 | + 'post_std': np.nanstd(post_gluc), |
| 170 | + 'pre_trend': np.nanmean(np.diff(pre_gluc[~np.isnan(pre_gluc)])), |
| 171 | + 'post_trend': np.nanmean(np.diff(post_gluc[~np.isnan(post_gluc)])), |
| 172 | + }) |
| 173 | + |
| 174 | +edf = pd.DataFrame(all_events) |
| 175 | +print(f"\nTotal natural experiment events: {len(edf)}") |
| 176 | +print(f"\nBy type:") |
| 177 | +for t in ['sensor_gap', 'insulin_gap', 'bg_discontinuity']: |
| 178 | + sub = edf[edf['type'] == t] |
| 179 | + print(f" {t}: {len(sub)} events across {sub['patient'].nunique()} patients") |
| 180 | + |
| 181 | +print(f"\nBy controller:") |
| 182 | +for ctrl in ['Loop', 'Trio', 'OpenAPS']: |
| 183 | + sub = edf[edf['controller'] == ctrl] |
| 184 | + print(f" {ctrl}: {len(sub)} events") |
| 185 | + |
| 186 | +# ══════════════════════════════════════════════════════════════════════════ |
| 187 | +# PART 2: Analyze Sensor Gap Behavior |
| 188 | +# ══════════════════════════════════════════════════════════════════════════ |
| 189 | + |
| 190 | +print("\n" + "=" * 70) |
| 191 | +print("PART 2: Sensor Gap Analysis (Controller Blind)") |
| 192 | +print("=" * 70) |
| 193 | + |
| 194 | +gaps = edf[edf['type'] == 'sensor_gap'].copy() |
| 195 | +if len(gaps) > 10: |
| 196 | + # Compare pre-gap vs post-gap glucose behavior |
| 197 | + print(f"\n Sensor gaps: {len(gaps)} events") |
| 198 | + print(f" Median gap duration: {gaps['duration_min'].median():.0f} min") |
| 199 | + |
| 200 | + # Post-gap glucose tends to be different because controller was blind |
| 201 | + pre_means = gaps['pre_mean'].values |
| 202 | + post_means = gaps['post_mean'].values |
| 203 | + t_stat, p_val = sp_stats.ttest_rel(pre_means, post_means) |
| 204 | + print(f"\n Pre-gap mean BG: {np.median(pre_means):.1f} mg/dL") |
| 205 | + print(f" Post-gap mean BG: {np.median(post_means):.1f} mg/dL") |
| 206 | + print(f" Paired t-test: t={t_stat:.2f}, p={p_val:.4f}") |
| 207 | + |
| 208 | + # Variability change |
| 209 | + pre_stds = gaps['pre_std'].values |
| 210 | + post_stds = gaps['post_std'].values |
| 211 | + t_std, p_std = sp_stats.ttest_rel(post_stds, pre_stds) |
| 212 | + print(f"\n Pre-gap BG std: {np.median(pre_stds):.1f}") |
| 213 | + print(f" Post-gap BG std: {np.median(post_stds):.1f}") |
| 214 | + print(f" Volatility change: t={t_std:.2f}, p={p_std:.4f}") |
| 215 | + |
| 216 | + gap_p_val = p_val |
| 217 | +else: |
| 218 | + gap_p_val = 1.0 |
| 219 | + print(" Insufficient sensor gap events") |
| 220 | + |
| 221 | +# ══════════════════════════════════════════════════════════════════════════ |
| 222 | +# PART 3: Insulin Gap Analysis (Possible Site Change / Disconnect) |
| 223 | +# ══════════════════════════════════════════════════════════════════════════ |
| 224 | + |
| 225 | +print("\n" + "=" * 70) |
| 226 | +print("PART 3: Insulin Gap Analysis") |
| 227 | +print("=" * 70) |
| 228 | + |
| 229 | +ins_gaps = edf[edf['type'] == 'insulin_gap'].copy() |
| 230 | +if len(ins_gaps) > 10: |
| 231 | + print(f"\n Insulin gaps: {len(ins_gaps)} events") |
| 232 | + print(f" Median gap duration: {ins_gaps['duration_min'].median():.0f} min") |
| 233 | + |
| 234 | + # After insulin gap, BG should rise (no insulin being delivered) |
| 235 | + pre_trend = ins_gaps['pre_trend'].values |
| 236 | + post_trend = ins_gaps['post_trend'].values |
| 237 | + |
| 238 | + # Post-gap BG rises because insulin was absent |
| 239 | + rise_after = (ins_gaps['post_mean'] - ins_gaps['pre_mean']).values |
| 240 | + t_rise, p_rise = sp_stats.ttest_1samp(rise_after, 0) |
| 241 | + print(f"\n Mean BG change (post-pre): {np.mean(rise_after):+.1f} mg/dL") |
| 242 | + print(f" Median BG change: {np.median(rise_after):+.1f} mg/dL") |
| 243 | + print(f" t-test (change ≠ 0): t={t_rise:.2f}, p={p_rise:.4f}") |
| 244 | + |
| 245 | + # This gives us EGP signal: during insulin gap, glucose rises at ~EGP rate |
| 246 | + # EGP ≈ rise_rate (mg/dL/5min) / 5 * 60 ≈ mg/dL/hr |
| 247 | + if len(ins_gaps) > 20: |
| 248 | + # Compute hourly rise rate during gap (approximation) |
| 249 | + hourly_rise = rise_after / (ins_gaps['duration_min'].values / 60) |
| 250 | + valid_rise = hourly_rise[~np.isnan(hourly_rise) & (np.abs(hourly_rise) < 200)] |
| 251 | + if len(valid_rise) > 10: |
| 252 | + print(f"\n Estimated unopposed EGP (from insulin gaps):") |
| 253 | + print(f" Median: {np.median(valid_rise):+.1f} mg/dL/hr") |
| 254 | + print(f" IQR: [{np.percentile(valid_rise, 25):.1f}, {np.percentile(valid_rise, 75):.1f}]") |
| 255 | + print(f" (Expected EGP ≈ 8-12 mg/dL/hr for typical adult)") |
| 256 | + |
| 257 | + ins_p_val = p_rise |
| 258 | +else: |
| 259 | + ins_p_val = 1.0 |
| 260 | + print(" Insufficient insulin gap events") |
| 261 | + |
| 262 | +# ══════════════════════════════════════════════════════════════════════════ |
| 263 | +# PART 4: BG Discontinuity — Post-Restart Volatility |
| 264 | +# ══════════════════════════════════════════════════════════════════════════ |
| 265 | + |
| 266 | +print("\n" + "=" * 70) |
| 267 | +print("PART 4: BG Discontinuity (Sensor Restart) Analysis") |
| 268 | +print("=" * 70) |
| 269 | + |
| 270 | +discs = edf[edf['type'] == 'bg_discontinuity'].copy() |
| 271 | +if len(discs) > 10: |
| 272 | + print(f"\n BG discontinuities: {len(discs)} events") |
| 273 | + print(f" Median jump size: {discs['jump_size'].median():.0f} mg/dL") |
| 274 | + |
| 275 | + # Post-restart volatility should be higher (controller adjusting to new reading) |
| 276 | + pre_stds = discs['pre_std'].values |
| 277 | + post_stds = discs['post_std'].values |
| 278 | + t_vol, p_vol = sp_stats.ttest_rel(post_stds, pre_stds) |
| 279 | + print(f"\n Pre-restart std: {np.median(pre_stds):.1f}") |
| 280 | + print(f" Post-restart std: {np.median(post_stds):.1f}") |
| 281 | + print(f" Volatility change: t={t_vol:.2f}, p={p_vol:.4f}") |
| 282 | + |
| 283 | + restart_p_val = p_vol |
| 284 | + restart_vol_increase = np.median(post_stds) > np.median(pre_stds) |
| 285 | +else: |
| 286 | + restart_p_val = 1.0 |
| 287 | + restart_vol_increase = False |
| 288 | + print(" Insufficient BG discontinuity events") |
| 289 | + |
| 290 | +# ══════════════════════════════════════════════════════════════════════════ |
| 291 | +# PART 5: Causal Direction — Pre→Post vs Post→Pre |
| 292 | +# ══════════════════════════════════════════════════════════════════════════ |
| 293 | + |
| 294 | +print("\n" + "=" * 70) |
| 295 | +print("PART 5: Causal Direction Test") |
| 296 | +print("=" * 70) |
| 297 | + |
| 298 | +# For natural experiments, pre-event conditions should predict post-event outcomes |
| 299 | +# but NOT vice versa (asymmetry confirms causal direction) |
| 300 | + |
| 301 | +if len(edf) > 50: |
| 302 | + # Use all events: does pre_mean predict post change? |
| 303 | + valid_events = edf.dropna(subset=['pre_mean', 'post_mean', 'pre_std', 'post_std']) |
| 304 | + |
| 305 | + # Forward: pre_mean → (post_mean - pre_mean) |
| 306 | + post_change = valid_events['post_mean'] - valid_events['pre_mean'] |
| 307 | + r_forward, p_forward = sp_stats.pearsonr(valid_events['pre_mean'].astype(float), post_change.astype(float)) |
| 308 | + |
| 309 | + # Backward: post_mean → (pre_mean - something_before) — can't do without earlier data |
| 310 | + # Alternative: pre_std → post_std (volatility transfer) |
| 311 | + r_vol, p_vol_causal = sp_stats.pearsonr(valid_events['pre_std'].astype(float), valid_events['post_std'].astype(float)) |
| 312 | + |
| 313 | + print(f" Forward: pre_mean → post_change: r={r_forward:.3f}, p={p_forward:.4f}") |
| 314 | + print(f" Volatility transfer: pre_std → post_std: r={r_vol:.3f}, p={p_vol_causal:.4f}") |
| 315 | + print(f" Pre BG predicts post change: {'Yes' if p_forward < 0.05 else 'No'}") |
| 316 | + |
| 317 | + causal_correct = p_forward < 0.05 |
| 318 | +else: |
| 319 | + causal_correct = False |
| 320 | + print(" Insufficient events for causal test") |
| 321 | + |
| 322 | +# ══════════════════════════════════════════════════════════════════════════ |
| 323 | +# CRITERIA |
| 324 | +# ══════════════════════════════════════════════════════════════════════════ |
| 325 | + |
| 326 | +print("\n" + "=" * 70) |
| 327 | +print("CRITERIA EVALUATION") |
| 328 | +print("=" * 70) |
| 329 | + |
| 330 | +P1 = len(edf) >= 100 |
| 331 | +p1_val = f"{len(edf)} events identified" |
| 332 | + |
| 333 | +P2 = gap_p_val < 0.05 if len(gaps) > 10 else False |
| 334 | +p2_val = f"Sensor gap p={gap_p_val:.4f}" if len(gaps) > 10 else "Insufficient gaps" |
| 335 | + |
| 336 | +P3 = restart_vol_increase |
| 337 | +p3_val = f"Post-restart volatility {'>' if restart_vol_increase else '≤'} pre-restart (p={restart_p_val:.4f})" |
| 338 | + |
| 339 | +P4 = ins_p_val < 0.05 if len(ins_gaps) > 10 else False |
| 340 | +p4_val = f"Insulin gap BG change p={ins_p_val:.4f}" if len(ins_gaps) > 10 else "Insufficient insulin gaps" |
| 341 | + |
| 342 | +P5 = causal_correct |
| 343 | +p5_val = f"Pre→post r={r_forward:.3f}, p={p_forward:.4f}" if len(edf) > 50 else "Insufficient" |
| 344 | + |
| 345 | +criteria = { |
| 346 | + 'P1_event_count': {'pass': P1, 'value': p1_val}, |
| 347 | + 'P2_sensor_gap_diff': {'pass': P2, 'value': p2_val}, |
| 348 | + 'P3_restart_volatility': {'pass': P3, 'value': p3_val}, |
| 349 | + 'P4_insulin_gap_rise': {'pass': P4, 'value': p4_val}, |
| 350 | + 'P5_causal_direction': {'pass': P5, 'value': p5_val}, |
| 351 | +} |
| 352 | + |
| 353 | +pass_count = sum(1 for c in criteria.values() if c['pass']) |
| 354 | +for name, c in criteria.items(): |
| 355 | + status = "PASS ✓" if c['pass'] else "FAIL ✗" |
| 356 | + print(f" {name}: {status} — {c['value']}") |
| 357 | +print(f"\nOverall: {pass_count}/5 criteria passed") |
| 358 | + |
| 359 | +# ── Save ────────────────────────────────────────────────────────────────── |
| 360 | + |
| 361 | +output = { |
| 362 | + 'experiment_id': f'EXP-{EXP_ID}', |
| 363 | + 'title': TITLE, |
| 364 | + 'timestamp': datetime.now().isoformat(), |
| 365 | + 'n_events': len(edf), |
| 366 | + 'event_counts': edf['type'].value_counts().to_dict(), |
| 367 | + 'criteria': criteria, |
| 368 | + 'pass_count': pass_count, |
| 369 | +} |
| 370 | + |
| 371 | +out_path = Path(f"externals/experiments/exp-{EXP_ID}_natural_experiments.json") |
| 372 | +with open(out_path, 'w') as f: |
| 373 | + json.dump(output, f, indent=2, default=str) |
| 374 | +print(f"\nResults saved to {out_path}") |
0 commit comments