Added scripts

fzakirov · fzakirov · commit dd23064929bd · 2024-12-09T14:35:10.000-05:00
diff --git a/code/file_wrangling/concatenate_tf.ipynb b/code/file_wrangling/concatenate_tf.ipynb
@@ -0,0 +1,249 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "c77c88b8-2a44-4cbb-92db-7e4af858bc75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mne\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from glob import glob\n",
+    "import scipy.io\n",
+    "import h5py\n",
+    "import os\n",
+    "from tqdm import tqdm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7429d1aa-cd96-4d79-af58-27dc4e85eaf0",
+   "metadata": {},
+   "source": [
+    "## Concatenate TF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
+   "id": "70aab6cb-cf53-456b-acd0-4a67c2fb71fa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "132"
+      ]
+     },
+     "execution_count": 147,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "csv_path = f\"derivatives/behavior/\"\n",
+    "output_path = f\"derivatives/preprocessed/TF_arrays/\"\n",
+    "\n",
+    "# take IDs from fully processed behavioral data (checked for accuracy, validRT, missed responses) separately for each condition\n",
+    "sub_nonsoc = list(pd.read_csv(glob(f\"{csv_path}thrive_data_nonsoc.csv\")[0])[\"sub\"])\n",
+    "sub_soc = list(pd.read_csv(glob(f\"{csv_path}thrive_data_soc.csv\")[0])[\"sub\"])\n",
+    "\n",
+    "# tf_files = sorted(glob(f\"{data_path}/sub-*{condition}*.mat\"))\n",
+    "for measure in [\n",
+    "    \"TF\",\n",
+    "    \"ITPS\",\n",
+    "    \"ICPS\",\n",
+    "    \"wPLI\"\n",
+    "]:\n",
+    "    if measure == \"ITPS\" or measure == \"ICPS\":\n",
+    "        key_idx = 1\n",
+    "    else:\n",
+    "        key_idx = -1\n",
+    "    data_path = f\"derivatives/preprocessed/TF_outputs/main/resp/{measure}/\"\n",
+    "    for condition in tqdm([\"resp_ns_c_1\", \"resp_ns_i_0\", \"resp_ns_i_1\",\n",
+    "                           \"resp_s_i_1\", \"resp_s_c_1\", \"resp_s_i_0\"]):\n",
+    "        if condition.split(\"_\")[1] == \"s\":\n",
+    "            valid_sub_list = sub_soc.copy()\n",
+    "        elif condition.split(\"_\")[1] == \"ns\":\n",
+    "            valid_sub_list = sub_nonsoc.copy()\n",
+    "        \n",
+    "        arr_list = []\n",
+    "        subjects_with_data = []    \n",
+    "        for sub_id in valid_sub_list:\n",
+    "            try:\n",
+    "                tf_files = sorted(glob(f\"{data_path}/sub-{sub_id}*{measure}*{condition}*.mat\"))\n",
+    "                # if len(tf_files) == 0:\n",
+    "                    # print(f\"{sub_id} not in TF\")\n",
+    "                data_file = h5py.File(tf_files[0])\n",
+    "                key_list = list(data_file.keys())\n",
+    "                data = data_file[key_list[key_idx]]\n",
+    "                assert data.shape == (64, 375, 59), \"Check your data!\"\n",
+    "                arr_list.append(data)\n",
+    "                subjects_with_data.append(sub_id)\n",
+    "            except: continue\n",
+    "            \n",
+    "        full_data = np.stack(arr_list, axis=0)\n",
+    "        assert full_data.shape[0] == len(subjects_with_data), \"Check your data!\"\n",
+    "        len(arr_list)\n",
+    "        scipy.io.savemat(f\"{output_path}/{measure}_{condition}.mat\",\n",
+    "                         {\n",
+    "                             f\"{measure}_{condition}\": full_data,\n",
+    "                             f\"subjects\": subjects_with_data,\n",
+    "                         })"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ddfa7622-4174-4289-9185-e1500d1b5e4b",
+   "metadata": {},
+   "source": [
+    "## Inspect number of events"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "546b567f-71fd-4d39-b9ed-b5fe5d794584",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "sub_to_inspect = \"192\"\n",
+    "trial_data = dict({\n",
+    "        \"sub\": [],\n",
+    "        \"s_resp_incon_error\": [],\n",
+    "        \"s_resp_incon_corr\": [],\n",
+    "        \"ns_resp_incon_error\": [],\n",
+    "        \"ns_resp_incon_corr\": [],\n",
+    "        \"s_stim_incon_corr\": [],\n",
+    "        \"s_stim_con_corr\": [],\n",
+    "        \"ns_stim_incon_corr\": [],\n",
+    "        \"ns_stim_con_corr\": [],\n",
+    "})\n",
+    "\n",
+    "dataset_path = \"/home/data/NDClab/datasets/thrive-dataset/\"\n",
+    "\n",
+    "sub_ids = sorted([i.split(\"/\")[-1] for i in glob(\n",
+    "        f\"{dataset_path}derivatives/preprocessed/sub-*{sub_to_inspect}*\")])\n",
+    "\n",
+    "list_of_eeg_file = sorted(\n",
+    "    glob(\n",
+    "        f\"{dataset_path}derivatives/preprocessed/*{sub_to_inspect}*/s1_r1/eeg/*all_eeg_processed_data*.set\")\n",
+    ")\n",
+    "\n",
+    "start = time.time()\n",
+    "\n",
+    "for file_idx, filename in enumerate(list_of_eeg_file):\n",
+    "    sub_id = sub_ids[file_idx].split(\"-\")[-1]\n",
+    "    trial_data[\"sub\"].append(sub_id)\n",
+    "    EEG = scipy.io.loadmat(filename, squeeze_me=True, struct_as_record=False)[\"EEG\"]\n",
+    "    EEG_mne = mne.io.read_epochs_eeglab(filename, verbose = 'ERROR',)\n",
+    "    \n",
+    "    events = EEG.event\n",
+    "    n_times = EEG.pnts\n",
+    "    sr = EEG.srate\n",
+    "    num_ch = EEG.nbchan\n",
+    "\n",
+    "    drop_idx = []\n",
+    "    for i in range(len(events)):\n",
+    "        latency = eeg_point2lat(\n",
+    "            [events[i].latency],\n",
+    "            [events[i].epoch],\n",
+    "            sr,\n",
+    "            timewin = [EEG.xmin*1000, EEG.xmax*1000],\n",
+    "            timeunit = 1e-3,\n",
+    "             )\n",
+    "        if latency >= -.1 and latency <= .1:\n",
+    "            drop_idx.append(i)\n",
+    "    \n",
+    "    events = [ev for ev in events if list(events).index(ev) in drop_idx]\n",
+    "    print(f\"sub-{sub_id}: {len(events)} good events were found!\")\n",
+    "    \n",
+    "    trial_data[\"s_resp_incon_error\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"s\") & (ev.eventType == \"resp\") & (ev.congruency == \"i\")\\\n",
+    "        & (ev.accuracy == 0) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "    \n",
+    "    trial_data[\"s_resp_incon_corr\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"s\") & (ev.eventType == \"resp\") & (ev.congruency == \"i\")\\\n",
+    "        & (ev.accuracy == 1) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "    \n",
+    "    trial_data[\"ns_resp_incon_error\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"ns\") & (ev.eventType == \"resp\") & (ev.congruency == \"i\")\\\n",
+    "        & (ev.accuracy == 0) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "    \n",
+    "    trial_data[\"ns_resp_incon_corr\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"ns\") & (ev.eventType == \"resp\") & (ev.congruency == \"i\")\\\n",
+    "        & (ev.accuracy == 1) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "    \n",
+    "    trial_data[\"s_stim_incon_corr\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"s\") & (ev.eventType == \"stim\") & (ev.congruency == \"i\")\\\n",
+    "        & (ev.accuracy == 1) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "    \n",
+    "    trial_data[\"s_stim_con_corr\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"s\") & (ev.eventType == \"stim\") & (ev.congruency == \"c\")\\\n",
+    "        & (ev.accuracy == 1) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "    \n",
+    "    trial_data[\"ns_stim_incon_corr\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"ns\") & (ev.eventType == \"stim\") & (ev.congruency == \"i\")\\\n",
+    "        & (ev.accuracy == 1) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "    \n",
+    "    trial_data[\"ns_stim_con_corr\"].append(len(\n",
+    "        [ev for ev in events if\\\n",
+    "        (ev.observation == \"ns\") & (ev.eventType == \"stim\") & (ev.congruency == \"c\")\\\n",
+    "        & (ev.accuracy == 1) & (ev.responded == 1) & (ev.validRt == 1) & (ev.extraResponse == 0)\n",
+    "    ]\n",
+    "    ))\n",
+    "\n",
+    "end = time.time()\n",
+    "print(f\"Executed time {np.round(end - start, 2)} s\")\n",
+    "\n",
+    "pd.DataFrame(trial_data)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}