HBClab
diff --git a/‎.DS_Store‎
6 KB b/‎.DS_Store‎
6 KB
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 32 additions & 0 deletions b/‎.github/workflows/main.yml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎.qcrc‎
Lines changed: 6 additions & 0 deletions b/‎.qcrc‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 113 additions & 0 deletions b/‎README.md‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎app/app.py‎
Lines changed: 66 additions & 0 deletions b/‎app/app.py‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎app/db 2.py‎
Lines changed: 167 additions & 0 deletions b/‎app/db 2.py‎
Lines changed: 167 additions & 0 deletions
@@ -0,0 +1,32 @@
+name: temp name
+
+on:
+  push:
+    branches: [ "main" ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.10"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Run Python script
+      run: |
+        python code/main_handler.py all
+    - name: Commit and push changes to dev
+      run: |
+        git config --global user.name "GitHub Actions Bot"
+        git config --global user.email "github-actions[bot]@users.noreply.github.com"
+        git checkout dev
+        git add .
+        git commit -m "Automated changes from GitHub Actions"
+        git push origin dev
@@ -0,0 +1,6 @@
+{
+
+    "DATADIR": "./data"
+    "RAWDATADIR": "./data/raw"
+
+}
@@ -0,0 +1,113 @@
+# Holistic QC rewrite
+
+> Contains a more modular approach to QCing, alleviates the headaches of old stuff
+
+
+## Requirements
+- process batches of txt files by turning them into csv
+    - requires basic qcs
+    - requires scoring criteria
+- plot information on a graph
+- save the data in the correct location
+- uploads the data to git and server
+- adds plots and scoring to github pages
+
+
+## Plan
+
+- `pull_handler` returns a list of txt files
+- `utils` contains commonly used functions like converting txt file to csv
+- each domain has its own qc file with diff methods for qcing by task
+    - takes in a list of files as an arg and processes them, returning the usability score and logging any problems
+
+
+## Tasks
+- [x] finish cc algos
+- [x] test
+- [ ] start WL/DWL algos -> separate class from mem
+
+
+
+## Relational Database Design Summary for Clinical Trial Cognitive Data
+
+>>Purpose & Scope
+	•	This database will organize and store clinical trial cognitive data.
+	•	Each participant completes 13 cognitive tasks over two runs each.
+	•	The data will be ingested daily from a prewritten backend.
+	•	The database will integrate with a frontend using Python and Azure.
+	•	Expected data volume: Hundreds to thousands of participants.
+
+>>Core Entities & Relationships
+
+1. Participants (participants)
+	•	Stores participant identifiers, their assigned study type (observation/intervention), and their site location.
+	•	Each participant completes 26 runs total (13 tasks × 2 runs).
+	•	Relationships:
+	•	Linked to sites (site_id)
+	•	Linked to study_types (study_id)
+	•	Has many runs
+
+2. Study Types (study_types)
+	•	Defines whether a participant is in the Intervention or Observation group.
+
+3. Sites (sites)
+	•	Stores the location each participant is from.
+	•	Explicitly defined in the directory structure.
+
+4. Tasks (tasks)
+	•	Stores the 13 predefined tasks in a static table.
+
+5. Runs (runs)
+	•	Stores each task run per participant (26 runs per participant).
+	•	Each run is linked to a participant and a task.
+	•	Can store a timestamp (nullable, extracted from CSVs).
+
+6. Results (results)
+	•	Stores raw cognitive task data extracted from CSV files.
+	•	CSV contents will be stored directly in the database (not just file paths).
+	•	Linked to runs via run_id.
+
+7. Reports (reports)
+	•	Stores 1-2 PNG files per run as binary blobs (not file paths).
+	•	Linked to runs via run_id.
+	•	Has a missing_png_flag to track if files are absent.
+
+Constraints & Data Integrity
+	•	Primary Keys (PKs) & Foreign Keys (FKs):
+	•	participant_id → Primary key in participants
+	•	task_id → Primary key in tasks
+	•	run_id → Primary key in runs, foreign key links to participants & tasks
+	•	result_id → Primary key in results, foreign key links to runs
+	•	report_id → Primary key in reports, foreign key links to runs
+	•	Data Rules & Validation:
+	•	All 13 tasks must be associated with each participant (26 runs total).
+	•	missing_png_flag will track missing PNG files.
+	•	csv_data will be stored as structured data (likely JSON or table format).
+
+>>Indexing & Optimization
+
+	•	Indexes on:
+	•	participant_id (for quick retrieval of participant data)
+	•	task_id (for filtering task-based results)
+	•	study_id (for intervention vs. observation analysis)
+	•	site_id (for location-based analysis)
+	•	Storage Considerations:
+	•	CSV data stored as structured content (JSON or column format).
+	•	PNG files stored as binary blobs.
+	•	Query Optimization:
+	•	JOINs will be used for participant-level queries.
+	•	Materialized views can be considered for frequently used summaries.
+
+>>Security & Access Control
+	•	Currently, only you will use the database, so permissions are simple.
+	•	Future security measures:
+	•	Row-level security for multiple users.
+	•	Encryption for sensitive participant records.
+
+>>Backup & Recovery
+	•	Daily backups of database storage + binary files.
+	•	Azure Blob Storage or PostgreSQL Large Objects for efficient handling of PNG & CSV files.
+
+Next Step: SQL Schema Implementation
+
+Would you like the SQL schema to be written for PostgreSQL, MySQL, or another database system?
@@ -0,0 +1,66 @@
+import os
+from flask import Flask, send_from_directory
+from main.utils import construct_master_list
+
+def update_png_paths_and_create_serve_function(app):
+    """
+    Updates the PNG file paths in MASTER_LIST to use the new served directory structure
+    and creates a Flask route to serve the files.
+    """
+    master_list = app.config["MASTER_LIST"]
+    data_folder = app.config["DATA_FOLDER"]
+
+    # Create a new directory structure for serving
+    for subject_id, subject_data in master_list.items():
+        for task_name, task_data in subject_data.get("tasks", {}).items():
+            new_png_paths = []
+            for file_path in task_data.get("png_paths", []):
+                # Extract relative path: 'subject/task/file'
+                relative_path = os.path.relpath(file_path, data_folder)
+                new_png_paths.append(f"data/{relative_path}")
+
+            # Update the master list with the new paths
+            task_data["png_paths"] = new_png_paths
+    # Add a route to serve the updated files
+    @app.route("/data/<path:subpath>")
+    def serve_data_file(subpath):
+        """
+        Serve files from the data directory using the new structure.
+        """
+        file_path = os.path.join(data_folder, subpath)
+        if not os.path.exists(file_path):
+            return f"File not found: {file_path}", 404
+
+        directory, filename = os.path.split(file_path)
+        return send_from_directory(directory, filename)
+
+
+def create_app():
+    app = Flask(__name__)
+    app.config['DATA_FOLDER'] = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data'))
+    app.config['ALLOWED_EXTENSIONS'] = {'csv', 'txt', 'png'}
+
+    # Ensure the data folder exists
+    if not os.path.exists(app.config['DATA_FOLDER']):
+        raise FileNotFoundError(f"Data folder not found at {app.config['DATA_FOLDER']}")
+
+    # Construct the master list and store it in the app config
+    app.config['MASTER_LIST'] = construct_master_list(app.config['DATA_FOLDER'])
+
+    # Update paths in the master list and add the serve route
+    with app.app_context():
+        update_png_paths_and_create_serve_function(app)
+
+    # Register blueprints
+    from feed_blueprint import feed_print
+    from home_blueprint import home_blueprint
+    app.register_blueprint(feed_print)
+    app.register_blueprint(home_blueprint)
+
+    return app
+
+
+if __name__ == '__main__':
+    # Initialize and run the Flask app
+    app = create_app()
+    app.run(debug=True)
@@ -0,0 +1,167 @@
+import os
+import psycopg
+from psycopg import sql
+import logging
+from main.update_db import DatabaseUtils
+
+# Database connection setup
+def connect_to_db(db_name, user, password, host="localhost", port=5432):
+    return psycopg.connect(dbname=db_name, user=user, password=password, host=host, port=port)
+
+# Initialize database schema
+def initialize_schema(connection):
+    try:
+        with connection.cursor() as cursor:
+            cursor.execute("""
+                -- Drop existing tables in reverse dependency order.
+                DROP TABLE IF EXISTS session CASCADE;
+                DROP TABLE IF EXISTS task CASCADE;
+                DROP TABLE IF EXISTS subject CASCADE;
+                DROP TABLE IF EXISTS site CASCADE;
+                DROP TABLE IF EXISTS study CASCADE;
+
+                -- Create table "study"
+                CREATE TABLE study (
+                    id SERIAL PRIMARY KEY,
+                    name TEXT NOT NULL UNIQUE
+                );
+
+                -- Create table "site"
+                CREATE TABLE site (
+                    id SERIAL PRIMARY KEY,
+                    name TEXT NOT NULL,
+                    study_id INTEGER NOT NULL,
+                    UNIQUE (name, study_id),
+                    FOREIGN KEY (study_id) REFERENCES study(id) ON DELETE CASCADE
+                );
+
+                -- Create table "subject"
+                CREATE TABLE subject (
+                    id SERIAL PRIMARY KEY,
+                    name TEXT NOT NULL,
+                    site_id INTEGER NOT NULL,
+                    UNIQUE (name, site_id),
+                    FOREIGN KEY (site_id) REFERENCES site(id) ON DELETE CASCADE
+                );
+
+                -- Create table "task"
+                CREATE TABLE task (
+                    id SERIAL PRIMARY KEY,
+                    name TEXT NOT NULL,
+                    subject_id INTEGER NOT NULL,
+                    UNIQUE (name, subject_id),
+                    FOREIGN KEY (subject_id) REFERENCES subject(id) ON DELETE CASCADE
+                );
+
+                -- Create table "session"
+                CREATE TABLE session (
+                    id SERIAL PRIMARY KEY,
+                    session_name TEXT NOT NULL,
+                    category INTEGER NOT NULL,
+                    csv_path TEXT NOT NULL,
+                    task_id INTEGER NOT NULL,
+                    date TIMESTAMP,
+                    plot_paths TEXT[],
+                    FOREIGN KEY (task_id) REFERENCES task(id) ON DELETE CASCADE,
+                    UNIQUE (session_name, category, csv_path, task_id)
+                );
+            """)
+            connection.commit()
+    except Exception as e:
+        logging.error(f"Error initializing schema: {e}")
+        connection.rollback()
+
+    finally:
+        if connection:
+            connection.close()
+
+# Populate the database from the folder structure
+def populate_database(connection, data_folder):
+    for study_name in os.listdir(data_folder):
+        study_path = os.path.join(data_folder, study_name)
+        if not os.path.isdir(study_path):
+            continue
+
+        with connection.cursor() as cursor:
+            cursor.execute("INSERT INTO study (name) VALUES (%s) ON CONFLICT (name) DO NOTHING RETURNING id;", (study_name,))
+            study_id = cursor.fetchone() or (cursor.execute("SELECT id FROM study WHERE name = %s;", (study_name,)), cursor.fetchone()[0])
+
+        for site_name in os.listdir(study_path):
+            site_path = os.path.join(study_path, site_name)
+            if not os.path.isdir(site_path):
+                continue
+
+            with connection.cursor() as cursor:
+                cursor.execute("INSERT INTO site (name, study_id) VALUES (%s, %s) ON CONFLICT DO NOTHING RETURNING id;", (site_name, study_id))
+                site_id = cursor.fetchone() or (cursor.execute("SELECT id FROM site WHERE name = %s AND study_id = %s;", (site_name, study_id)), cursor.fetchone()[0])
+
+            for subject_name in os.listdir(site_path):
+                subject_path = os.path.join(site_path, subject_name)
+                if not os.path.isdir(subject_path):
+                    continue
+
+                with connection.cursor() as cursor:
+                    cursor.execute("INSERT INTO subject (name, site_id) VALUES (%s, %s) ON CONFLICT DO NOTHING RETURNING id;", (subject_name, site_id))
+                    subject_id = cursor.fetchone() or (cursor.execute("SELECT id FROM subject WHERE name = %s AND site_id = %s;", (subject_name, site_id)), cursor.fetchone()[0])
+
+                for task_name in os.listdir(subject_path):
+                    task_path = os.path.join(subject_path, task_name)
+                    if not os.path.isdir(task_path):
+                        continue
+
+                    with connection.cursor() as cursor:
+                        cursor.execute("INSERT INTO task (name, subject_id) VALUES (%s, %s) ON CONFLICT DO NOTHING RETURNING id;", (task_name, subject_id))
+                        task_id = cursor.fetchone() or (cursor.execute("SELECT id FROM task WHERE name = %s AND subject_id = %s;", (task_name, subject_id)), cursor.fetchone()[0])
+
+                    for folder in ["data", "plot"]:
+                        folder_path = os.path.join(task_path, folder)
+                        if not os.path.exists(folder_path):
+                            continue
+
+                        if folder == "data":
+                            for file in os.listdir(folder_path):
+                                if file.endswith(".csv"):
+                                    parts = file.split("_")
+                                    session_name = parts[1].split("-")[1]
+                                    category = int(parts[2].split("-")[1].split(".")[0])
+
+                                    with connection.cursor() as cursor:
+                                        cursor.execute("""
+                                        INSERT INTO session (session_name, category, csv_path, task_id)
+                                        VALUES (%s, %s, %s, %s)
+                                        ON CONFLICT DO NOTHING;
+                                        """, (session_name, category, os.path.join(folder_path, file), task_id))
+
+                        elif folder == "plot":
+                            plots = []
+                            for file in os.listdir(folder_path):
+                                if file.endswith(".png"):
+                                    plots.append(os.path.join(folder_path, file))
+
+                            with connection.cursor() as cursor:
+                                cursor.execute("""
+                                UPDATE session
+                                SET plot_paths = %s
+                                WHERE task_id = %s;
+                                """, (plots, task_id))
+        connection.commit()
+import psycopg
+from psycopg import sql
+
+
+# Main entry point
+if __name__ == "__main__":
+    db_name = "boost-beh"
+    user = "zakg04"
+    password = "*mIloisfAT23*123*"
+    data_folder = "../data"
+    connection = connect_to_db(db_name, user, password)
+    try:
+        initialize_schema(connection)
+    finally:
+        connection.close()
+'''
+    util_instance = DatabaseUtils(connection, data_folder)
+    util_instance.update_database()
+
+'''
-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +{
++
 +    "DATADIR": "./data"
 +    "RAWDATADIR": "./data/raw"
++
 +}