Skip to content

Commit cee29b0

Browse files
author
Zak Gilliam
committed
rust ols model - qc files - plots in JS with d3
implemented a rust ols model to use outputs from the python pipeline python pipeline now creates a proper output file and creates necessary data for plots `docs/meta_plot` now includes d3 visualization for the ols model
1 parent d6fea20 commit cee29b0

File tree

17 files changed

+1386
-127
lines changed

17 files changed

+1386
-127
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
/nix/store/myzj19ncjjr77x0jsr2f2wjqxvxvnq8k-nix-shell-env
1+
/nix/store/lx87nq6aqzjql6c042ngigmabn552yhg-nix-shell-env

.direnv/flake-profile-a5d5b61aa8a61b7d9d765e1daf971a9a578f1cfa.rc

Lines changed: 201 additions & 26 deletions
Large diffs are not rendered by default.

code/main.py

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,35 +41,43 @@ def main(self):
4141
from util.hr.extract_hr import extract_hr
4242
from util.zone.extract_zones import extract_zones
4343
from qc.sup import QC_Sup
44-
for project in ["InterventionStudy", "ObservationalStudy"]:
45-
project_path = os.path.join(self.base_path, project, "3-Experiment", "data", "polarhrcsv")
46-
if os.path.exists(project_path):
47-
for session in ["Supervised", "Unsupervised"]:
48-
session_path = os.path.join(project_path, session)
49-
logging.debug(f"Processing session: {session_path}")
50-
if os.path.exists(session_path):
51-
# return the files dict that contains base_path and list of files for each base_path
52-
files = get_files(session_path)
53-
# extract hr from each file
54-
for subject, subject_files in files.items():
55-
for file in subject_files:
56-
if file.lower().endswith('.csv'):
57-
hr = extract_hr(subject_files)
58-
zones = extract_zones(self.zone_path, subject)
59-
err = QC_Sup(hr, zones).main()
60-
61-
if subject not in err_master:
62-
# first time: create a list with this one error
63-
err_master[subject] = [[file,err]]
64-
else:
65-
# append to the existing list
66-
err_master[subject].append([file,err])
44+
project_path = os.path.join(self.base_path, "InterventionStudy", "3-Experiment", "data", "polarhrcsv")
45+
if os.path.exists(project_path):
46+
for session in ["Supervised", "Unsupervised"]:
47+
session_path = os.path.join(project_path, session)
48+
logging.debug(f"Processing session: {session_path}")
49+
if os.path.exists(session_path):
50+
# return the files dict that contains base_path and list of files for each base_path
51+
files = get_files(session_path)
52+
# extract hr from each file
53+
for subject, subject_files in files.items():
54+
for file in subject_files:
55+
if file.lower().endswith('.csv'):
56+
hr = extract_hr(subject_files)
57+
zones = extract_zones(self.zone_path, subject)
58+
err = QC_Sup(hr, zones).main()
59+
60+
if subject not in err_master:
61+
# first time: create a list with this one error
62+
err_master[subject] = [[file,err]]
63+
else:
64+
# append to the existing list
65+
err_master[subject].append([file,err])
6766
err_master = {
6867
subject: [e for e in errs if e]
6968
for subject, errs in err_master.items()
7069
}
7170
from qc.save_qc import save_qc
7271
save_qc(err_master, self.out_path)
72+
from plot.get_data import Get_Data
73+
path = os.path.join(self.base_path, "InterventionStudy", "3-Experiment", "data", "polarhrcsv")
74+
gd = Get_Data(sup_path=os.path.join(path, "Supervised"), unsup_path=os.path.join(path, "Unsupervised"), study="InterventionStudy")
75+
meta = gd.get_meta()
76+
df_master = gd.build_master_df()
77+
gd.save_for_rust("../rust-ols-adherence-cli/data.csv")
78+
79+
80+
7381
return err_master
7482

7583

code/plot/avg.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

code/plot/get_data.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import os
2+
import pandas as pd
3+
from typing import Dict, List
4+
5+
6+
class Get_Data:
7+
"""
8+
Build a dataset for OLS/WLS:
9+
- sup_prop = (# supervised CSVs) / 30
10+
- unsup_den = (# unsupervised CSVs actually observed; <= 30)
11+
- unsup_prop= (# unsupervised CSVs) / max(unsup_den, 1)
12+
Notes:
13+
- We treat each *.csv file as a completed session.
14+
- If you prefer unsupervised adherence out of 30 planned, add a column:
15+
unsup_prop_30 = unsup_n / 30.0
16+
"""
17+
18+
def __init__(self, sup_path: str, unsup_path: str, study: str = "InterventionStudy"):
19+
self.sup_path = sup_path
20+
self.unsup_path = unsup_path
21+
self.study = study
22+
self.master = pd.DataFrame()
23+
24+
@staticmethod
25+
def _list_subjects(path: str) -> List[str]:
26+
return [
27+
d for d in os.listdir(path)
28+
if not d.startswith(".") and os.path.isdir(os.path.join(path, d))
29+
]
30+
31+
@staticmethod
32+
def _count_csvs(path: str) -> int:
33+
try:
34+
return sum(
35+
1 for f in os.listdir(path)
36+
if f.lower().endswith(".csv") and not f.startswith(".")
37+
)
38+
except FileNotFoundError:
39+
return 0
40+
41+
def get_meta(self) -> Dict:
42+
"""
43+
Count how many subjects have session 30 present (by filename containing '_ses30')
44+
and how many sessions are missing from 30 in each folder.
45+
"""
46+
meta = {
47+
"sup": {"ses30_count": 0, "total_missing": 0, "subjects_complete": []},
48+
"unsup": {"ses30_count": 0, "total_missing": 0, "subjects_complete": []}
49+
}
50+
51+
for study_path, label in [(self.sup_path, "sup"), (self.unsup_path, "unsup")]:
52+
for subject in self._list_subjects(study_path):
53+
subject_path = os.path.join(study_path, subject)
54+
files = [
55+
f for f in os.listdir(subject_path)
56+
if f.lower().endswith(".csv") and not f.startswith(".")
57+
]
58+
59+
# Session 30 present?
60+
if any("_ses30" in f.lower() for f in files):
61+
meta[label]["ses30_count"] += 1
62+
meta[label]["subjects_complete"].append(subject)
63+
64+
# Missing (from 30 planned)
65+
meta[label]["total_missing"] += max(0, 30 - len(files))
66+
67+
return meta
68+
69+
def build_master_df(self) -> pd.DataFrame:
70+
"""
71+
Create one row per subject with:
72+
subject, sup_n, sup_prop, unsup_n, unsup_den, unsup_prop, unsup_prop_30
73+
"""
74+
sup_subjects = set(self._list_subjects(self.sup_path))
75+
unsup_subjects = set(self._list_subjects(self.unsup_path))
76+
subjects = sorted(sup_subjects | unsup_subjects)
77+
78+
rows = []
79+
for subj in subjects:
80+
sup_dir = os.path.join(self.sup_path, subj)
81+
unsup_dir = os.path.join(self.unsup_path, subj)
82+
83+
sup_n = self._count_csvs(sup_dir)
84+
unsup_n = self._count_csvs(unsup_dir)
85+
86+
sup_prop = sup_n / 30.0
87+
unsup_den = max(unsup_n, 0) # how many unsup observations exist (<=30)
88+
unsup_prop = (unsup_n / max(unsup_den, 1)) if unsup_den > 0 else 0.0
89+
unsup_prop_30 = unsup_n / 30.0
90+
91+
rows.append({
92+
"subject": subj,
93+
"sup_n": sup_n,
94+
"sup_prop": sup_prop,
95+
"unsup_n": unsup_n,
96+
"unsup_den": unsup_den,
97+
"unsup_prop": unsup_prop, # used by Rust CLI as y
98+
"unsup_prop_30": unsup_prop_30 # optional – adherence out of 30 planned
99+
})
100+
101+
self.master = pd.DataFrame(rows)
102+
return self.master
103+
104+
def save_for_rust(self, out_csv: str = "data.csv") -> str:
105+
"""
106+
Save the minimal schema the Rust CLI expects:
107+
sup_prop (x), unsup_prop (y), unsup_den (m)
108+
"""
109+
if self.master.empty:
110+
self.build_master_df()
111+
df = self.master[["sup_prop", "unsup_prop", "unsup_den"]].copy()
112+
df.rename(columns={"sup_prop": "sup_prop",
113+
"unsup_prop": "unsup_prop",
114+
"unsup_den": "unsup_den"}, inplace=True)
115+
df.to_csv(out_csv, index=False)
116+
return out_csv
117+

dev-shells/python.nix

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
{
2+
description = "A Nix-flake-based Python development environment";
3+
4+
inputs.nixpkgs.url = "https://flakehub.com/f/NixOS/nixpkgs/0.1.*.tar.gz";
5+
6+
outputs = { self, nixpkgs }:
7+
let
8+
supportedSystems = [ "x86_64-linux" "aarch64-darwin" ];
9+
forEachSupportedSystem = f: nixpkgs.lib.genAttrs supportedSystems (system: f {
10+
pkgs = import nixpkgs { inherit system; };
11+
});
12+
13+
/*
14+
* Change this value ({major}.{min}) to
15+
* update the Python virtual-environment
16+
* version. When you do this, make sure
17+
* to delete the `.venv` directory to
18+
* have the hook rebuild it for the new
19+
* version, since it won't overwrite an
20+
* existing one. After this, reload the
21+
* development shell to rebuild it.
22+
* You'll see a warning asking you to
23+
* do this when version mismatches are
24+
* present. For safety, removal should
25+
* be a manual step, even if trivial.
26+
*/
27+
version = "3.13";
28+
in
29+
{
30+
devShells = forEachSupportedSystem ({ pkgs }:
31+
let
32+
concatMajorMinor = v:
33+
pkgs.lib.pipe v [
34+
pkgs.lib.versions.splitVersion
35+
(pkgs.lib.sublist 0 2)
36+
pkgs.lib.concatStrings
37+
];
38+
39+
python = pkgs."python${concatMajorMinor version}";
40+
in
41+
{
42+
default = pkgs.mkShellNoCC {
43+
venvDir = ".venv";
44+
45+
postShellHook = ''
46+
venvVersionWarn() {
47+
local venvVersion
48+
venvVersion="$("$venvDir/bin/python" -c 'import platform; print(platform.python_version())')"
49+
50+
[[ "$venvVersion" == "${python.version}" ]] && return
51+
52+
cat <<EOF
53+
Warning: Python version mismatch: [$venvVersion (venv)] != [${python.version}]
54+
Delete '$venvDir' and reload to rebuild for version ${python.version}
55+
EOF
56+
}
57+
58+
venvVersionWarn
59+
'';
60+
61+
packages = [
62+
python.pkgs.venvShellHook
63+
python.pkgs.pip
64+
65+
# Data manipulation
66+
python.pkgs.pandas
67+
python.pkgs.numpy
68+
python.pkgs.openpyxl
69+
70+
# Visualization
71+
python.pkgs.matplotlib
72+
python.pkgs.seaborn
73+
python.pkgs.plotly
74+
75+
# API requests
76+
python.pkgs.requests
77+
python.pkgs.httpx
78+
79+
# Jupyter/IPython for interactive work
80+
python.pkgs.jupyterlab
81+
python.pkgs.ipython
82+
83+
# Scientific computing and YAML
84+
python.pkgs.scipy
85+
python.pkgs.pyyaml
86+
pkgs.git
87+
88+
];
89+
};
90+
});
91+
};
92+
}

dev-shells/rust.nix

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
description = "A Nix-flake-based Rust development environment";
3+
4+
inputs = {
5+
nixpkgs.url = "https://flakehub.com/f/NixOS/nixpkgs/0.1.*.tar.gz";
6+
rust-overlay = {
7+
url = "github:oxalica/rust-overlay";
8+
inputs.nixpkgs.follows = "nixpkgs";
9+
};
10+
};
11+
12+
outputs = { self, nixpkgs, rust-overlay }:
13+
let
14+
supportedSystems = [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ];
15+
forEachSupportedSystem = f: nixpkgs.lib.genAttrs supportedSystems (system: f {
16+
pkgs = import nixpkgs {
17+
inherit system;
18+
overlays = [ rust-overlay.overlays.default self.overlays.default ];
19+
};
20+
});
21+
in
22+
{
23+
overlays.default = final: prev: {
24+
rustToolchain =
25+
let
26+
rust = prev.rust-bin;
27+
in
28+
if builtins.pathExists ./rust-toolchain.toml then
29+
rust.fromRustupToolchainFile ./rust-toolchain.toml
30+
else if builtins.pathExists ./rust-toolchain then
31+
rust.fromRustupToolchainFile ./rust-toolchain
32+
else
33+
rust.stable.latest.default.override {
34+
extensions = [ "rust-src" "rustfmt" ];
35+
};
36+
};
37+
38+
devShells = forEachSupportedSystem ({ pkgs }: {
39+
default = pkgs.mkShell {
40+
packages = with pkgs; [
41+
rustToolchain
42+
openssl
43+
pkg-config
44+
cargo-deny
45+
cargo-edit
46+
cargo-watch
47+
rust-analyzer
48+
];
49+
50+
env = {
51+
# Required by rust-analyzer
52+
RUST_SRC_PATH = "${pkgs.rustToolchain}/lib/rustlib/src/rust/library";
53+
};
54+
};
55+
});
56+
};
57+
}

0 commit comments

Comments
 (0)