HBClab
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 32 additions & 0 deletions b/‎.github/workflows/main.yml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 82 additions & 0 deletions b/‎README.md‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎app/__pycache__/app.cpython-39.pyc‎
-2.04 KB b/‎app/__pycache__/app.cpython-39.pyc‎
-2.04 KB
diff --git a/‎app/__pycache__/feed_blueprint.cpython-39.pyc‎
-3.49 KB b/‎app/__pycache__/feed_blueprint.cpython-39.pyc‎
-3.49 KB
diff --git a/‎app/__pycache__/home_blueprint.cpython-39.pyc‎
-1.61 KB b/‎app/__pycache__/home_blueprint.cpython-39.pyc‎
-1.61 KB
diff --git a/‎app/app.py‎
Lines changed: 1 addition & 1 deletion b/‎app/app.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/db.py‎
Lines changed: 148 additions & 0 deletions b/‎app/db.py‎
Lines changed: 148 additions & 0 deletions
diff --git a/‎app/main/__pycache__/update_db.cpython-39.pyc‎
4.82 KB b/‎app/main/__pycache__/update_db.cpython-39.pyc‎
4.82 KB
diff --git a/‎app/main/__pycache__/utils.cpython-39.pyc‎
-3.13 KB b/‎app/main/__pycache__/utils.cpython-39.pyc‎
-3.13 KB
@@ -0,0 +1,32 @@
+name: temp name
+
+on:
+  push:
+    branches: [ "main", "dev" ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.10"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Run Python script
+      run: |
+        python code/main_handler.py all
+    - name: Commit and push changes to dev
+      run: |
+        git config --global user.name "GitHub Actions Bot"
+        git config --global user.email "github-actions[bot]@users.noreply.github.com"
+        git checkout dev
+        git add .
+        git commit -m "Automated changes from GitHub Actions"
+        git push origin dev
@@ -28,4 +28,86 @@
 
 
 
+## Relational Database Design Summary for Clinical Trial Cognitive Data
 
+>>Purpose & Scope
+	•	This database will organize and store clinical trial cognitive data.
+	•	Each participant completes 13 cognitive tasks over two runs each.
+	•	The data will be ingested daily from a prewritten backend.
+	•	The database will integrate with a frontend using Python and Azure.
+	•	Expected data volume: Hundreds to thousands of participants.
+
+>>Core Entities & Relationships
+
+1. Participants (participants)
+	•	Stores participant identifiers, their assigned study type (observation/intervention), and their site location.
+	•	Each participant completes 26 runs total (13 tasks × 2 runs).
+	•	Relationships:
+	•	Linked to sites (site_id)
+	•	Linked to study_types (study_id)
+	•	Has many runs
+
+2. Study Types (study_types)
+	•	Defines whether a participant is in the Intervention or Observation group.
+
+3. Sites (sites)
+	•	Stores the location each participant is from.
+	•	Explicitly defined in the directory structure.
+
+4. Tasks (tasks)
+	•	Stores the 13 predefined tasks in a static table.
+
+5. Runs (runs)
+	•	Stores each task run per participant (26 runs per participant).
+	•	Each run is linked to a participant and a task.
+	•	Can store a timestamp (nullable, extracted from CSVs).
+
+6. Results (results)
+	•	Stores raw cognitive task data extracted from CSV files.
+	•	CSV contents will be stored directly in the database (not just file paths).
+	•	Linked to runs via run_id.
+
+7. Reports (reports)
+	•	Stores 1-2 PNG files per run as binary blobs (not file paths).
+	•	Linked to runs via run_id.
+	•	Has a missing_png_flag to track if files are absent.
+
+Constraints & Data Integrity
+	•	Primary Keys (PKs) & Foreign Keys (FKs):
+	•	participant_id → Primary key in participants
+	•	task_id → Primary key in tasks
+	•	run_id → Primary key in runs, foreign key links to participants & tasks
+	•	result_id → Primary key in results, foreign key links to runs
+	•	report_id → Primary key in reports, foreign key links to runs
+	•	Data Rules & Validation:
+	•	All 13 tasks must be associated with each participant (26 runs total).
+	•	missing_png_flag will track missing PNG files.
+	•	csv_data will be stored as structured data (likely JSON or table format).
+
+>>Indexing & Optimization
+
+	•	Indexes on:
+	•	participant_id (for quick retrieval of participant data)
+	•	task_id (for filtering task-based results)
+	•	study_id (for intervention vs. observation analysis)
+	•	site_id (for location-based analysis)
+	•	Storage Considerations:
+	•	CSV data stored as structured content (JSON or column format).
+	•	PNG files stored as binary blobs.
+	•	Query Optimization:
+	•	JOINs will be used for participant-level queries.
+	•	Materialized views can be considered for frequently used summaries.
+
+>>Security & Access Control
+	•	Currently, only you will use the database, so permissions are simple.
+	•	Future security measures:
+	•	Row-level security for multiple users.
+	•	Encryption for sensitive participant records.
+
+>>Backup & Recovery
+	•	Daily backups of database storage + binary files.
+	•	Azure Blob Storage or PostgreSQL Large Objects for efficient handling of PNG & CSV files.
+
+Next Step: SQL Schema Implementation
+
+Would you like the SQL schema to be written for PostgreSQL, MySQL, or another database system?
@@ -37,7 +37,7 @@ def serve_data_file(subpath):
 
 def create_app():
     app = Flask(__name__)
-    app.config['DATA_FOLDER'] = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data', 'test'))
+    app.config['DATA_FOLDER'] = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data'))
     app.config['ALLOWED_EXTENSIONS'] = {'csv', 'txt', 'png'}
 
     # Ensure the data folder exists
 
@@ -0,0 +1,148 @@
+import os
+import psycopg
+from psycopg import sql
+import logging
+from main.update_db import DatabaseUtils
+
+# Database connection setup
+def connect_to_db(db_name, user, password, host="localhost", port=5432):
+    return psycopg.connect(dbname=db_name, user=user, password=password, host=host, port=port)
+
+# Initialize database schema
+def initialize_schema(connection):
+    try:
+        with connection.cursor() as cursor:
+            cursor.execute("""
+            CREATE TABLE IF NOT EXISTS study (
+                id SERIAL PRIMARY KEY,
+                name VARCHAR(50) UNIQUE NOT NULL
+            );
+
+            CREATE TABLE IF NOT EXISTS site (
+                id SERIAL PRIMARY KEY,
+                name VARCHAR(50) NOT NULL,
+                study_id INT REFERENCES study(id) ON DELETE CASCADE
+            );
+
+            CREATE TABLE IF NOT EXISTS subject (
+                id SERIAL PRIMARY KEY,
+                name VARCHAR(50) NOT NULL,
+                site_id INT REFERENCES site(id) ON DELETE CASCADE
+            );
+
+            CREATE TABLE IF NOT EXISTS task (
+                id SERIAL PRIMARY KEY,
+                name VARCHAR(50) NOT NULL,
+                subject_id INT REFERENCES subject(id) ON DELETE CASCADE
+            );
+
+            CREATE TABLE IF NOT EXISTS session (
+                id SERIAL PRIMARY KEY,
+                session_name VARCHAR(50) NOT NULL,
+                category INT NOT NULL,
+                csv_path TEXT,
+                plot_paths TEXT[],
+                task_id INT REFERENCES task(id) ON DELETE CASCADE,
+                date TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
+            );
+            """)
+            connection.commit()
+    except Exception as e:
+        logging.error(f"Error initializing schema: {e}")
+        connection.rollback()
+
+    finally:
+        if connection:
+            connection.close()
+
+# Populate the database from the folder structure
+def populate_database(connection, data_folder):
+    for study_name in os.listdir(data_folder):
+        study_path = os.path.join(data_folder, study_name)
+        if not os.path.isdir(study_path):
+            continue
+
+        with connection.cursor() as cursor:
+            cursor.execute("INSERT INTO study (name) VALUES (%s) ON CONFLICT (name) DO NOTHING RETURNING id;", (study_name,))
+            study_id = cursor.fetchone() or (cursor.execute("SELECT id FROM study WHERE name = %s;", (study_name,)), cursor.fetchone()[0])
+
+        for site_name in os.listdir(study_path):
+            site_path = os.path.join(study_path, site_name)
+            if not os.path.isdir(site_path):
+                continue
+
+            with connection.cursor() as cursor:
+                cursor.execute("INSERT INTO site (name, study_id) VALUES (%s, %s) ON CONFLICT DO NOTHING RETURNING id;", (site_name, study_id))
+                site_id = cursor.fetchone() or (cursor.execute("SELECT id FROM site WHERE name = %s AND study_id = %s;", (site_name, study_id)), cursor.fetchone()[0])
+
+            for subject_name in os.listdir(site_path):
+                subject_path = os.path.join(site_path, subject_name)
+                if not os.path.isdir(subject_path):
+                    continue
+
+                with connection.cursor() as cursor:
+                    cursor.execute("INSERT INTO subject (name, site_id) VALUES (%s, %s) ON CONFLICT DO NOTHING RETURNING id;", (subject_name, site_id))
+                    subject_id = cursor.fetchone() or (cursor.execute("SELECT id FROM subject WHERE name = %s AND site_id = %s;", (subject_name, site_id)), cursor.fetchone()[0])
+
+                for task_name in os.listdir(subject_path):
+                    task_path = os.path.join(subject_path, task_name)
+                    if not os.path.isdir(task_path):
+                        continue
+
+                    with connection.cursor() as cursor:
+                        cursor.execute("INSERT INTO task (name, subject_id) VALUES (%s, %s) ON CONFLICT DO NOTHING RETURNING id;", (task_name, subject_id))
+                        task_id = cursor.fetchone() or (cursor.execute("SELECT id FROM task WHERE name = %s AND subject_id = %s;", (task_name, subject_id)), cursor.fetchone()[0])
+
+                    for folder in ["data", "plot"]:
+                        folder_path = os.path.join(task_path, folder)
+                        if not os.path.exists(folder_path):
+                            continue
+
+                        if folder == "data":
+                            for file in os.listdir(folder_path):
+                                if file.endswith(".csv"):
+                                    parts = file.split("_")
+                                    session_name = parts[1].split("-")[1]
+                                    category = int(parts[2].split("-")[1].split(".")[0])
+
+                                    with connection.cursor() as cursor:
+                                        cursor.execute("""
+                                        INSERT INTO session (session_name, category, csv_path, task_id)
+                                        VALUES (%s, %s, %s, %s)
+                                        ON CONFLICT DO NOTHING;
+                                        """, (session_name, category, os.path.join(folder_path, file), task_id))
+
+                        elif folder == "plot":
+                            plots = []
+                            for file in os.listdir(folder_path):
+                                if file.endswith(".png"):
+                                    plots.append(os.path.join(folder_path, file))
+
+                            with connection.cursor() as cursor:
+                                cursor.execute("""
+                                UPDATE session
+                                SET plot_paths = %s
+                                WHERE task_id = %s;
+                                """, (plots, task_id))
+        connection.commit()
+import psycopg
+from psycopg import sql
+
+
+# Main entry point
+if __name__ == "__main__":
+    db_name = "boostbeh"
+    user = "zakg04"
+    password = "*mIloisfAT23*123*"
+    data_folder = "../data"
+    connection = connect_to_db(db_name, user, password)
+    util_instance = DatabaseUtils(connection, data_folder)
+    util_instance.update_database()
+
+    """conn = connect_to_db(db_name, user, password)
+    try:
+        initialize_schema(conn)
+        populate_database(conn, data_folder)
+        print("Database initialized and populated successfully.")
+    finally:
+        conn.close()"""