1+ import re
12import os
3+ import logging
24import pandas as pd
35from typing import Dict , List
46
7+ logger = logging .getLogger (__name__ )
8+
9+ _SES_RE = re .compile (r"_ses(\d+)\.csv$" , re .IGNORECASE )
10+
11+ def _max_session (dir_path : str ) -> int :
12+ """
13+ Scan a directory and return the largest session number from files
14+ named like '*_wkXX_sesNN.CSV'. Returns 0 if none found.
15+ """
16+ try :
17+ return max (
18+ (int (m .group (1 )) for fn in os .listdir (dir_path )
19+ if (m := _SES_RE .search (fn )) is not None ),
20+ default = 0 ,
21+ )
22+ except FileNotFoundError :
23+ return 0
524
625class Get_Data :
726 """
@@ -69,7 +88,9 @@ def get_meta(self) -> Dict:
6988 def build_master_df (self ) -> pd .DataFrame :
7089 """
7190 Create one row per subject with:
72- subject, sup_n, sup_prop, unsup_n, unsup_den, unsup_prop, unsup_prop_30
91+ subject, sup_n, sup_prop, sup_den, unsup_n, unsup_den, unsup_prop, unsup_prop_30
92+ - 'den' for each side is the largest session index seen in filenames.
93+ - Skip subjects with fewer than 6 unsupervised sessions.
7394 """
7495 sup_subjects = set (self ._list_subjects (self .sup_path ))
7596 unsup_subjects = set (self ._list_subjects (self .unsup_path ))
@@ -83,19 +104,37 @@ def build_master_df(self) -> pd.DataFrame:
83104 sup_n = self ._count_csvs (sup_dir )
84105 unsup_n = self ._count_csvs (unsup_dir )
85106
86- sup_prop = sup_n / 30.0
87- unsup_den = max (unsup_n , 0 ) # how many unsup observations exist (<=30)
88- unsup_prop = (unsup_n / max (unsup_den , 1 )) if unsup_den > 0 else 0.0
89- unsup_prop_30 = unsup_n / 30.0
107+ # Denominator = max session index observed in filenames
108+ sup_den = _max_session (sup_dir )
109+ unsup_den = _max_session (unsup_dir )
110+
111+ logger .debug (
112+ f"Subject { subj } : sup_n={ sup_n } , sup_den={ sup_den } , "
113+ f"unsup_n={ unsup_n } , unsup_den={ unsup_den } "
114+ )
115+
116+ # Skip if fewer than 6 unsupervised sessions
117+ if unsup_n < 6 :
118+ continue
119+
120+ # Guard against zero denominators: if no session index found,
121+ # fall back to n (prevents div-by-zero but keeps proportion meaningful)
122+ sup_den_eff = sup_den if sup_den > 0 else max (sup_n , 1 )
123+ unsup_den_eff = unsup_den if unsup_den > 0 else max (unsup_n , 1 )
124+
125+ sup_prop = sup_n / float (sup_den_eff )
126+ unsup_prop = unsup_n / float (unsup_den_eff )
127+ unsup_prop_30 = unsup_n / 30.0 # keep for reference/comparison if you still want it
90128
91129 rows .append ({
92130 "subject" : subj ,
93131 "sup_n" : sup_n ,
132+ "sup_den" : sup_den , # raw parsed max session
94133 "sup_prop" : sup_prop ,
95134 "unsup_n" : unsup_n ,
96- "unsup_den" : unsup_den ,
97- "unsup_prop" : unsup_prop , # used by Rust CLI as y
98- "unsup_prop_30" : unsup_prop_30 # optional – adherence out of 30 planned
135+ "unsup_den" : unsup_den , # raw parsed max session
136+ "unsup_prop" : unsup_prop , # used by Rust CLI as y
137+ "unsup_prop_30" : unsup_prop_30 ,
99138 })
100139
101140 self .master = pd .DataFrame (rows )
@@ -106,12 +145,20 @@ def save_for_rust(self, out_csv: str = "data.csv") -> str:
106145 Save the minimal schema the Rust CLI expects:
107146 sup_prop (x), unsup_prop (y), unsup_den (m)
108147 """
109- if self .master .empty :
148+ if getattr ( self , "master" , None ) is None or self .master .empty :
110149 self .build_master_df ()
111150 df = self .master [["sup_prop" , "unsup_prop" , "unsup_den" ]].copy ()
112- df .rename (columns = {"sup_prop" : "sup_prop" ,
113- "unsup_prop" : "unsup_prop" ,
114- "unsup_den" : "unsup_den" }, inplace = True )
115151 df .to_csv (out_csv , index = False )
116152 return out_csv
153+ '''
154+ from main import Main
155+ from plot.get_data import Get_Data
156+ import os
157+ main = Main(system="Home")
158+ path = os.path.join(main.base_path, "InterventionStudy", "3-Experiment", "data", "polarhrcsv")
159+ gd = Get_Data(sup_path=os.path.join(path, "Supervised"), unsup_path=os.path.join(path, "Unsupervised"), study="InterventionStudy")
160+ df_master = gd.build_master_df()
161+ gd.save_for_rust("../rust-ols-adherence-cli/data.csv")
162+
117163
164+ '''
0 commit comments