HBClab
diff --git a/‎.direnv/flake-profile-a5d5b61aa8a61b7d9d765e1daf971a9a578f1cfa‎
Lines changed: 1 addition & 1 deletion b/‎.direnv/flake-profile-a5d5b61aa8a61b7d9d765e1daf971a9a578f1cfa‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.direnv/flake-profile-a5d5b61aa8a61b7d9d765e1daf971a9a578f1cfa.rc‎
Lines changed: 201 additions & 26 deletions b/‎.direnv/flake-profile-a5d5b61aa8a61b7d9d765e1daf971a9a578f1cfa.rc‎
Lines changed: 201 additions & 26 deletions
diff --git a/‎code/main.py‎
Lines changed: 31 additions & 23 deletions b/‎code/main.py‎
Lines changed: 31 additions & 23 deletions
diff --git a/‎code/plot/avg.py‎
Lines changed: 0 additions & 15 deletions b/‎code/plot/avg.py‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎code/plot/get_data.py‎
Lines changed: 117 additions & 0 deletions b/‎code/plot/get_data.py‎
Lines changed: 117 additions & 0 deletions
diff --git a/‎dev-shells/python.nix‎
Lines changed: 92 additions & 0 deletions b/‎dev-shells/python.nix‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎dev-shells/rust.nix‎
Lines changed: 57 additions & 0 deletions b/‎dev-shells/rust.nix‎
Lines changed: 57 additions & 0 deletions
@@ -1 +1 @@
-/nix/store/myzj19ncjjr77x0jsr2f2wjqxvxvnq8k-nix-shell-env
+/nix/store/lx87nq6aqzjql6c042ngigmabn552yhg-nix-shell-env
@@ -41,35 +41,43 @@ def main(self):
         from util.hr.extract_hr import extract_hr
         from util.zone.extract_zones import extract_zones
         from qc.sup import QC_Sup
-        for project in ["InterventionStudy", "ObservationalStudy"]:
-            project_path = os.path.join(self.base_path, project, "3-Experiment", "data", "polarhrcsv")
-            if os.path.exists(project_path):
-                for session in ["Supervised", "Unsupervised"]:
-                    session_path = os.path.join(project_path, session)
-                    logging.debug(f"Processing session: {session_path}")
-                    if os.path.exists(session_path):
-                        # return the files dict that contains base_path and list of files for each base_path
-                        files = get_files(session_path)
-                        # extract hr from each file
-                        for subject, subject_files in files.items():
-                            for file in subject_files:
-                                if file.lower().endswith('.csv'):
-                                    hr = extract_hr(subject_files)
-                                    zones = extract_zones(self.zone_path, subject)
-                                    err = QC_Sup(hr, zones).main()
-
-                                    if subject not in err_master:
-                                        # first time: create a list with this one error
-                                        err_master[subject] = [[file,err]]
-                                    else:
-                                        # append to the existing list
-                                        err_master[subject].append([file,err])
+        project_path = os.path.join(self.base_path, "InterventionStudy", "3-Experiment", "data", "polarhrcsv")
+        if os.path.exists(project_path):
+            for session in ["Supervised", "Unsupervised"]:
+                session_path = os.path.join(project_path, session)
+                logging.debug(f"Processing session: {session_path}")
+                if os.path.exists(session_path):
+                    # return the files dict that contains base_path and list of files for each base_path
+                    files = get_files(session_path)
+                    # extract hr from each file
+                    for subject, subject_files in files.items():
+                        for file in subject_files:
+                            if file.lower().endswith('.csv'):
+                                hr = extract_hr(subject_files)
+                                zones = extract_zones(self.zone_path, subject)
+                                err = QC_Sup(hr, zones).main()
+
+                                if subject not in err_master:
+                                    # first time: create a list with this one error
+                                    err_master[subject] = [[file,err]]
+                                else:
+                                    # append to the existing list
+                                    err_master[subject].append([file,err])
         err_master = {
             subject: [e for e in errs if e]
             for subject, errs in err_master.items()
         }
         from qc.save_qc import save_qc
         save_qc(err_master, self.out_path)
+        from plot.get_data import Get_Data
+        path = os.path.join(self.base_path, "InterventionStudy", "3-Experiment", "data", "polarhrcsv")
+        gd = Get_Data(sup_path=os.path.join(path, "Supervised"), unsup_path=os.path.join(path, "Unsupervised"), study="InterventionStudy")
+        meta = gd.get_meta()
+        df_master = gd.build_master_df()
+        gd.save_for_rust("../rust-ols-adherence-cli/data.csv")
+
+
+
         return err_master
 
 
 
@@ -0,0 +1,117 @@
+import os
+import pandas as pd
+from typing import Dict, List
+
+
+class Get_Data:
+    """
+    Build a dataset for OLS/WLS:
+      - sup_prop  = (# supervised CSVs) / 30
+      - unsup_den = (# unsupervised CSVs actually observed; <= 30)
+      - unsup_prop= (# unsupervised CSVs) / max(unsup_den, 1)
+    Notes:
+      - We treat each *.csv file as a completed session.
+      - If you prefer unsupervised adherence out of 30 planned, add a column:
+          unsup_prop_30 = unsup_n / 30.0
+    """
+
+    def __init__(self, sup_path: str, unsup_path: str, study: str = "InterventionStudy"):
+        self.sup_path = sup_path
+        self.unsup_path = unsup_path
+        self.study = study
+        self.master = pd.DataFrame()
+
+    @staticmethod
+    def _list_subjects(path: str) -> List[str]:
+        return [
+            d for d in os.listdir(path)
+            if not d.startswith(".") and os.path.isdir(os.path.join(path, d))
+        ]
+
+    @staticmethod
+    def _count_csvs(path: str) -> int:
+        try:
+            return sum(
+                1 for f in os.listdir(path)
+                if f.lower().endswith(".csv") and not f.startswith(".")
+            )
+        except FileNotFoundError:
+            return 0
+
+    def get_meta(self) -> Dict:
+        """
+        Count how many subjects have session 30 present (by filename containing '_ses30')
+        and how many sessions are missing from 30 in each folder.
+        """
+        meta = {
+            "sup": {"ses30_count": 0, "total_missing": 0, "subjects_complete": []},
+            "unsup": {"ses30_count": 0, "total_missing": 0, "subjects_complete": []}
+        }
+
+        for study_path, label in [(self.sup_path, "sup"), (self.unsup_path, "unsup")]:
+            for subject in self._list_subjects(study_path):
+                subject_path = os.path.join(study_path, subject)
+                files = [
+                    f for f in os.listdir(subject_path)
+                    if f.lower().endswith(".csv") and not f.startswith(".")
+                ]
+
+                # Session 30 present?
+                if any("_ses30" in f.lower() for f in files):
+                    meta[label]["ses30_count"] += 1
+                    meta[label]["subjects_complete"].append(subject)
+
+                # Missing (from 30 planned)
+                meta[label]["total_missing"] += max(0, 30 - len(files))
+
+        return meta
+
+    def build_master_df(self) -> pd.DataFrame:
+        """
+        Create one row per subject with:
+          subject, sup_n, sup_prop, unsup_n, unsup_den, unsup_prop, unsup_prop_30
+        """
+        sup_subjects = set(self._list_subjects(self.sup_path))
+        unsup_subjects = set(self._list_subjects(self.unsup_path))
+        subjects = sorted(sup_subjects | unsup_subjects)
+
+        rows = []
+        for subj in subjects:
+            sup_dir = os.path.join(self.sup_path, subj)
+            unsup_dir = os.path.join(self.unsup_path, subj)
+
+            sup_n = self._count_csvs(sup_dir)
+            unsup_n = self._count_csvs(unsup_dir)
+
+            sup_prop = sup_n / 30.0
+            unsup_den = max(unsup_n, 0)              # how many unsup observations exist (<=30)
+            unsup_prop = (unsup_n / max(unsup_den, 1)) if unsup_den > 0 else 0.0
+            unsup_prop_30 = unsup_n / 30.0
+
+            rows.append({
+                "subject": subj,
+                "sup_n": sup_n,
+                "sup_prop": sup_prop,
+                "unsup_n": unsup_n,
+                "unsup_den": unsup_den,
+                "unsup_prop": unsup_prop,       # used by Rust CLI as y
+                "unsup_prop_30": unsup_prop_30  # optional – adherence out of 30 planned
+            })
+
+        self.master = pd.DataFrame(rows)
+        return self.master
+
+    def save_for_rust(self, out_csv: str = "data.csv") -> str:
+        """
+        Save the minimal schema the Rust CLI expects:
+          sup_prop (x), unsup_prop (y), unsup_den (m)
+        """
+        if self.master.empty:
+            self.build_master_df()
+        df = self.master[["sup_prop", "unsup_prop", "unsup_den"]].copy()
+        df.rename(columns={"sup_prop": "sup_prop",
+                           "unsup_prop": "unsup_prop",
+                           "unsup_den": "unsup_den"}, inplace=True)
+        df.to_csv(out_csv, index=False)
+        return out_csv
+
@@ -0,0 +1,92 @@
+{
+  description = "A Nix-flake-based Python development environment";
+
+  inputs.nixpkgs.url = "https://flakehub.com/f/NixOS/nixpkgs/0.1.*.tar.gz";
+
+  outputs = { self, nixpkgs }:
+    let
+      supportedSystems = [ "x86_64-linux"  "aarch64-darwin" ];
+      forEachSupportedSystem = f: nixpkgs.lib.genAttrs supportedSystems (system: f {
+        pkgs = import nixpkgs { inherit system; };
+      });
+
+      /*
+       * Change this value ({major}.{min}) to
+       * update the Python virtual-environment
+       * version. When you do this, make sure
+       * to delete the `.venv` directory to
+       * have the hook rebuild it for the new
+       * version, since it won't overwrite an
+       * existing one. After this, reload the
+       * development shell to rebuild it.
+       * You'll see a warning asking you to
+       * do this when version mismatches are
+       * present. For safety, removal should
+       * be a manual step, even if trivial.
+       */
+      version = "3.13";
+    in
+    {
+      devShells = forEachSupportedSystem ({ pkgs }:
+        let
+          concatMajorMinor = v:
+            pkgs.lib.pipe v [
+              pkgs.lib.versions.splitVersion
+              (pkgs.lib.sublist 0 2)
+              pkgs.lib.concatStrings
+            ];
+
+          python = pkgs."python${concatMajorMinor version}";
+        in
+        {
+          default = pkgs.mkShellNoCC {
+            venvDir = ".venv";
+
+            postShellHook = ''
+              venvVersionWarn() {
+              	local venvVersion
+              	venvVersion="$("$venvDir/bin/python" -c 'import platform; print(platform.python_version())')"
+
+              	[[ "$venvVersion" == "${python.version}" ]] && return
+
+              	cat <<EOF
+              Warning: Python version mismatch: [$venvVersion (venv)] != [${python.version}]
+                       Delete '$venvDir' and reload to rebuild for version ${python.version}
+              EOF
+              }
+
+              venvVersionWarn
+            '';
+
+            packages = [
+              python.pkgs.venvShellHook
+              python.pkgs.pip
+
+              # Data manipulation
+              python.pkgs.pandas
+              python.pkgs.numpy
+              python.pkgs.openpyxl
+
+              # Visualization
+              python.pkgs.matplotlib
+              python.pkgs.seaborn
+              python.pkgs.plotly
+
+              # API requests
+              python.pkgs.requests
+              python.pkgs.httpx
+
+              # Jupyter/IPython for interactive work
+              python.pkgs.jupyterlab
+              python.pkgs.ipython
+
+              # Scientific computing and YAML
+              python.pkgs.scipy
+              python.pkgs.pyyaml
+              pkgs.git
+
+            ];
+            };
+        });
+    };
+}
@@ -0,0 +1,57 @@
+{
+  description = "A Nix-flake-based Rust development environment";
+
+  inputs = {
+    nixpkgs.url = "https://flakehub.com/f/NixOS/nixpkgs/0.1.*.tar.gz";
+    rust-overlay = {
+      url = "github:oxalica/rust-overlay";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+  };
+
+  outputs = { self, nixpkgs, rust-overlay }:
+    let
+      supportedSystems = [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ];
+      forEachSupportedSystem = f: nixpkgs.lib.genAttrs supportedSystems (system: f {
+       pkgs = import nixpkgs {
+          inherit system;
+          overlays = [ rust-overlay.overlays.default self.overlays.default ];
+        };
+      });
+    in
+    {
+      overlays.default = final: prev: {
+        rustToolchain =
+          let
+            rust = prev.rust-bin;
+          in
+          if builtins.pathExists ./rust-toolchain.toml then
+            rust.fromRustupToolchainFile ./rust-toolchain.toml
+          else if builtins.pathExists ./rust-toolchain then
+            rust.fromRustupToolchainFile ./rust-toolchain
+          else
+            rust.stable.latest.default.override {
+              extensions = [ "rust-src" "rustfmt" ];
+            };
+      };
+
+      devShells = forEachSupportedSystem ({ pkgs }: {
+        default = pkgs.mkShell {
+          packages = with pkgs; [
+            rustToolchain
+            openssl
+            pkg-config
+            cargo-deny
+            cargo-edit
+            cargo-watch
+            rust-analyzer
+          ];
+
+          env = {
+            # Required by rust-analyzer
+            RUST_SRC_PATH = "${pkgs.rustToolchain}/lib/rustlib/src/rust/library";
+          };
+        };
+      });
+    };
+}
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-/nix/store/myzj19ncjjr77x0jsr2f2wjqxvxvnq8k-nix-shell-env`
	`1`	`+/nix/store/lx87nq6aqzjql6c042ngigmabn552yhg-nix-shell-env`